avocet/web/src/views/BenchmarkView.vue
pyr0ball 7c304ebc45 feat: benchmark model picker, category grouping, stats benchmark results
Backend (app/api.py):
- GET /api/benchmark/models — returns installed models grouped by adapter
  type (ZeroShotAdapter, RerankerAdapter, GenerationAdapter, Unknown);
  reads _MODELS_DIR via app.models so test overrides are respected
- GET /api/benchmark/run — add model_names query param (comma-separated);
  when set, passes --models <names...> to benchmark_classifier.py
- GET /api/stats — add benchmark_results field from benchmark_results.json

Frontend:
- BenchmarkView: collapsible Model Selection panel with per-category
  checkboxes, select-all per category (supports indeterminate state),
  collapsed summary badge ("All models (N)" or "N of M selected");
  model_names only sent when a strict subset is selected
- StatsView: Benchmark Results table (accuracy, macro_f1, weighted_f1)
  with best-model highlighting per metric; hidden when no results exist
2026-04-08 23:03:56 -07:00

1095 lines
33 KiB
Vue
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<template>
<div class="bench-view">
<header class="bench-header">
<h1 class="page-title">🏁 Benchmark</h1>
<div class="header-actions">
<label class="slow-toggle" :class="{ disabled: running }">
<input type="checkbox" v-model="includeSlow" :disabled="running" />
Include slow models
</label>
<button
class="btn-run"
:disabled="running"
@click="startBenchmark"
>
{{ running ? '⏳ Running…' : results ? '🔄 Re-run' : '▶ Run Benchmark' }}
</button>
<button
v-if="running"
class="btn-cancel"
@click="cancelBenchmark"
>
✕ Cancel
</button>
</div>
</header>
<!-- Model Picker -->
<details class="model-picker" ref="pickerEl">
<summary class="picker-summary">
<span class="picker-title">🎯 Model Selection</span>
<span class="picker-badge">{{ pickerSummaryText }}</span>
</summary>
<div class="picker-body">
<div v-if="modelsLoading" class="picker-loading">Loading models…</div>
<div v-else-if="Object.keys(modelCategories).length === 0" class="picker-empty">
No models found — check API connection.
</div>
<template v-else>
<div
v-for="(models, category) in modelCategories"
:key="category"
class="picker-category"
>
<label class="picker-cat-header">
<input
type="checkbox"
:checked="isCategoryAllSelected(models)"
:indeterminate="isCategoryIndeterminate(models)"
@change="toggleCategory(models, ($event.target as HTMLInputElement).checked)"
/>
<span class="picker-cat-name">{{ category }}</span>
<span class="picker-cat-count">({{ models.length }})</span>
</label>
<div v-if="models.length === 0" class="picker-no-models">No models installed</div>
<div v-else class="picker-model-list">
<label
v-for="m in models"
:key="m.name"
class="picker-model-row"
>
<input
type="checkbox"
:checked="selectedModels.has(m.name)"
@change="toggleModel(m.name, ($event.target as HTMLInputElement).checked)"
/>
<span class="picker-model-name" :title="m.repo_id ?? m.name">{{ m.name }}</span>
<span class="picker-adapter-type">{{ m.adapter_type }}</span>
</label>
</div>
</div>
</template>
</div>
</details>
<!-- Trained models badge row -->
<div v-if="fineTunedModels.length > 0" class="trained-models-row">
<span class="trained-label">Trained:</span>
<span
v-for="m in fineTunedModels"
:key="m.name"
class="trained-badge"
:title="m.base_model_id ? `Base: ${m.base_model_id} · ${m.sample_count ?? '?'} samples` : m.name"
>
{{ m.name }}
<span v-if="m.val_macro_f1 != null" class="trained-f1">
F1 {{ (m.val_macro_f1 * 100).toFixed(1) }}%
</span>
</span>
</div>
<!-- Progress log -->
<div v-if="running || runLog.length" class="run-log">
<div class="run-log-title">
<span>{{ running ? '⏳ Running benchmark…' : runCancelled ? '⏹ Cancelled' : runError ? '❌ Failed' : '✅ Done' }}</span>
<button class="btn-ghost" @click="runLog = []; runError = ''; runCancelled = false">Clear</button>
</div>
<div class="log-lines" ref="logEl">
<div
v-for="(line, i) in runLog"
:key="i"
class="log-line"
:class="{ 'log-error': line.startsWith('ERROR') || line.startsWith('[error]') }"
>{{ line }}</div>
</div>
<p v-if="runError" class="run-error">{{ runError }}</p>
</div>
<!-- Loading -->
<div v-if="loading" class="status-notice">Loading…</div>
<!-- No results yet -->
<div v-else-if="!results" class="status-notice empty">
<p>No benchmark results yet.</p>
<p class="hint">Click <strong>Run Benchmark</strong> to score all default models against your labeled data.</p>
</div>
<!-- Results -->
<template v-else>
<p class="meta-line">
<span>{{ results.sample_count.toLocaleString() }} labeled emails</span>
<span class="sep">·</span>
<span>{{ modelCount }} model{{ modelCount === 1 ? '' : 's' }}</span>
<span class="sep">·</span>
<span>{{ formatDate(results.timestamp) }}</span>
</p>
<!-- Macro-F1 chart -->
<section class="chart-section">
<h2 class="chart-title">Macro-F1 (higher = better)</h2>
<div class="bar-chart">
<div v-for="row in f1Rows" :key="row.name" class="bar-row">
<span class="bar-label" :title="row.name">{{ row.name }}</span>
<div class="bar-track">
<div
class="bar-fill"
:style="{ width: `${row.pct}%`, background: scoreColor(row.value) }"
/>
</div>
<span class="bar-value" :style="{ color: scoreColor(row.value) }">
{{ row.value.toFixed(3) }}
</span>
</div>
</div>
</section>
<!-- Latency chart -->
<section class="chart-section">
<h2 class="chart-title">Latency (ms / email, lower = better)</h2>
<div class="bar-chart">
<div v-for="row in latencyRows" :key="row.name" class="bar-row">
<span class="bar-label" :title="row.name">{{ row.name }}</span>
<div class="bar-track">
<div
class="bar-fill latency-fill"
:style="{ width: `${row.pct}%` }"
/>
</div>
<span class="bar-value">{{ row.value.toFixed(1) }} ms</span>
</div>
</div>
</section>
<!-- Per-label F1 heatmap -->
<section class="chart-section">
<h2 class="chart-title">Per-label F1</h2>
<div class="heatmap-scroll">
<table class="heatmap">
<thead>
<tr>
<th class="hm-label-col">Label</th>
<th v-for="name in modelNames" :key="name" class="hm-model-col" :title="name">
{{ name }}
</th>
</tr>
</thead>
<tbody>
<tr v-for="label in labelNames" :key="label">
<td class="hm-label-cell">
<span class="hm-emoji">{{ LABEL_META[label]?.emoji ?? '🏷️' }}</span>
{{ label.replace(/_/g, '\u00a0') }}
</td>
<td
v-for="name in modelNames"
:key="name"
class="hm-value-cell"
:style="{ background: heatmapBg(f1For(name, label)), color: heatmapFg(f1For(name, label)) }"
:title="`${name} / ${label}: F1 ${f1For(name, label).toFixed(3)}, support ${supportFor(name, label)}`"
>
{{ f1For(name, label).toFixed(2) }}
</td>
</tr>
</tbody>
</table>
</div>
<p class="heatmap-hint">Hover a cell for precision / recall / support. Color: 🟢 ≥ 0.7 · 🟡 0.40.7 · 🔴 &lt; 0.4</p>
</section>
</template>
<!-- Fine-tune section -->
<details class="ft-section">
<summary class="ft-summary">Fine-tune a model</summary>
<div class="ft-body">
<div class="ft-controls">
<label class="ft-field">
<span class="ft-field-label">Model</span>
<select v-model="ftModel" class="ft-select" :disabled="ftRunning">
<option value="deberta-small">deberta-small (100M, fast)</option>
<option value="bge-m3">bge-m3 (600M stop Peregrine vLLM first)</option>
</select>
</label>
<label class="ft-field">
<span class="ft-field-label">Epochs</span>
<input
v-model.number="ftEpochs"
type="number" min="1" max="20"
class="ft-epochs"
:disabled="ftRunning"
/>
</label>
<button
class="btn-run ft-run-btn"
:disabled="ftRunning"
@click="startFinetune"
>
{{ ftRunning ? '⏳ Training…' : '▶ Run fine-tune' }}
</button>
<button
v-if="ftRunning"
class="btn-cancel"
@click="cancelFinetune"
>
Cancel
</button>
</div>
<div v-if="ftRunning || ftLog.length || ftError" class="run-log ft-log">
<div class="run-log-title">
<span>{{ ftRunning ? '⏳ Training…' : ftCancelled ? '⏹ Cancelled' : ftError ? '❌ Failed' : '✅ Done' }}</span>
<button class="btn-ghost" @click="ftLog = []; ftError = ''; ftCancelled = false">Clear</button>
</div>
<div class="log-lines" ref="ftLogEl">
<div
v-for="(line, i) in ftLog"
:key="i"
class="log-line"
:class="{ 'log-error': line.startsWith('ERROR') || line.startsWith('[error]') }"
>{{ line }}</div>
</div>
<p v-if="ftError" class="run-error">{{ ftError }}</p>
</div>
</div>
</details>
</div>
</template>
<script setup lang="ts">
import { ref, computed, onMounted, nextTick } from 'vue'
import { useApiFetch, useApiSSE } from '../composables/useApi'
// ── Label metadata (same as StatsView) ──────────────────────────────────────
const LABEL_META: Record<string, { emoji: string }> = {
interview_scheduled: { emoji: '🗓️' },
offer_received: { emoji: '🎉' },
rejected: { emoji: '❌' },
positive_response: { emoji: '👍' },
survey_received: { emoji: '📋' },
neutral: { emoji: '⬜' },
event_rescheduled: { emoji: '🔄' },
digest: { emoji: '📰' },
new_lead: { emoji: '🤝' },
hired: { emoji: '🎊' },
}
// ── Types ────────────────────────────────────────────────────────────────────
interface AvailableModel {
name: string
repo_id?: string
adapter_type: string
}
interface ModelCategoriesResponse {
categories: Record<string, AvailableModel[]>
}
interface FineTunedModel {
name: string
base_model_id?: string
val_macro_f1?: number
timestamp?: string
sample_count?: number
}
interface PerLabel { f1: number; precision: number; recall: number; support: number }
interface ModelResult {
macro_f1: number
accuracy: number
latency_ms: number
per_label: Record<string, PerLabel>
}
interface BenchResults {
timestamp: string | null
sample_count: number
models: Record<string, ModelResult>
}
// ── State ────────────────────────────────────────────────────────────────────
const results = ref<BenchResults | null>(null)
const loading = ref(true)
const running = ref(false)
const runLog = ref<string[]>([])
const runError = ref('')
const includeSlow = ref(false)
const logEl = ref<HTMLElement | null>(null)
// Model picker state
const modelCategories = ref<Record<string, AvailableModel[]>>({})
const selectedModels = ref<Set<string>>(new Set())
const allModels = ref<string[]>([])
const modelsLoading = ref(false)
const pickerEl = ref<HTMLDetailsElement | null>(null)
// Fine-tune state
const fineTunedModels = ref<FineTunedModel[]>([])
const ftModel = ref('deberta-small')
const ftEpochs = ref(5)
const ftRunning = ref(false)
const ftLog = ref<string[]>([])
const ftError = ref('')
const ftLogEl = ref<HTMLElement | null>(null)
const runCancelled = ref(false)
const ftCancelled = ref(false)
async function cancelBenchmark() {
await fetch('/api/benchmark/cancel', { method: 'POST' }).catch(() => {})
}
async function cancelFinetune() {
await fetch('/api/finetune/cancel', { method: 'POST' }).catch(() => {})
}
// ── Model picker computed ─────────────────────────────────────────────────────
const pickerSummaryText = computed(() => {
const total = allModels.value.length
if (total === 0) return 'No models available'
const selected = selectedModels.value.size
if (selected === total) return `All models (${total})`
return `${selected} of ${total} selected`
})
function isCategoryAllSelected(models: AvailableModel[]): boolean {
return models.length > 0 && models.every(m => selectedModels.value.has(m.name))
}
function isCategoryIndeterminate(models: AvailableModel[]): boolean {
const someSelected = models.some(m => selectedModels.value.has(m.name))
return someSelected && !isCategoryAllSelected(models)
}
function toggleModel(name: string, checked: boolean) {
const next = new Set(selectedModels.value)
if (checked) next.add(name)
else next.delete(name)
selectedModels.value = next
}
function toggleCategory(models: AvailableModel[], checked: boolean) {
const next = new Set(selectedModels.value)
for (const m of models) {
if (checked) next.add(m.name)
else next.delete(m.name)
}
selectedModels.value = next
}
async function loadModelCategories() {
modelsLoading.value = true
const { data } = await useApiFetch<ModelCategoriesResponse>('/api/benchmark/models')
modelsLoading.value = false
if (data?.categories) {
modelCategories.value = data.categories
const flat = Object.values(data.categories).flat().map(m => m.name)
allModels.value = flat
selectedModels.value = new Set(flat)
}
}
// ── Derived ──────────────────────────────────────────────────────────────────
const modelNames = computed(() => Object.keys(results.value?.models ?? {}))
const modelCount = computed(() => modelNames.value.length)
const labelNames = computed(() => {
const canonical = Object.keys(LABEL_META)
const inResults = new Set(
modelNames.value.flatMap(n => Object.keys(results.value!.models[n].per_label))
)
return [...canonical.filter(l => inResults.has(l)), ...[...inResults].filter(l => !canonical.includes(l))]
})
const f1Rows = computed(() => {
if (!results.value) return []
const rows = modelNames.value.map(name => ({
name,
value: results.value!.models[name].macro_f1,
}))
rows.sort((a, b) => b.value - a.value)
const max = rows[0]?.value || 1
return rows.map(r => ({ ...r, pct: Math.round((r.value / max) * 100) }))
})
const latencyRows = computed(() => {
if (!results.value) return []
const rows = modelNames.value.map(name => ({
name,
value: results.value!.models[name].latency_ms,
}))
rows.sort((a, b) => a.value - b.value) // fastest first
const max = rows[rows.length - 1]?.value || 1
return rows.map(r => ({ ...r, pct: Math.round((r.value / max) * 100) }))
})
// ── Helpers ──────────────────────────────────────────────────────────────────
function f1For(model: string, label: string): number {
return results.value?.models[model]?.per_label[label]?.f1 ?? 0
}
function supportFor(model: string, label: string): number {
return results.value?.models[model]?.per_label[label]?.support ?? 0
}
function scoreColor(v: number): string {
if (v >= 0.7) return 'var(--color-success, #4CAF50)'
if (v >= 0.4) return 'var(--app-accent, #B8622A)'
return 'var(--color-error, #ef4444)'
}
function heatmapBg(v: number): string {
// Blend red→yellow→green using the F1 value
if (v >= 0.7) return `color-mix(in srgb, #4CAF50 ${Math.round(v * 100)}%, #1a2338 ${Math.round((1 - v) * 80)}%)`
if (v >= 0.4) return `color-mix(in srgb, #FF9800 ${Math.round(v * 120)}%, #1a2338 40%)`
return `color-mix(in srgb, #ef4444 ${Math.round(v * 200 + 30)}%, #1a2338 60%)`
}
function heatmapFg(v: number): string {
return v >= 0.5 ? '#fff' : 'rgba(255,255,255,0.75)'
}
function formatDate(iso: string | null): string {
if (!iso) return 'unknown date'
const d = new Date(iso)
return d.toLocaleString(undefined, { dateStyle: 'medium', timeStyle: 'short' })
}
// ── Data loading ─────────────────────────────────────────────────────────────
async function loadResults() {
loading.value = true
const { data } = await useApiFetch<BenchResults>('/api/benchmark/results')
loading.value = false
if (data && Object.keys(data.models).length > 0) {
results.value = data
}
}
// ── Benchmark run ─────────────────────────────────────────────────────────────
function startBenchmark() {
running.value = true
runLog.value = []
runError.value = ''
runCancelled.value = false
const params = new URLSearchParams()
if (includeSlow.value) params.set('include_slow', 'true')
// Only send model_names when a subset is selected (not all, not none)
const total = allModels.value.length
const selected = selectedModels.value.size
if (total > 0 && selected > 0 && selected < total) {
params.set('model_names', [...selectedModels.value].join(','))
}
const qs = params.toString()
const url = `/api/benchmark/run${qs ? `?${qs}` : ''}`
useApiSSE(
url,
async (event) => {
if (event.type === 'progress' && typeof event.message === 'string') {
runLog.value.push(event.message)
await nextTick()
logEl.value?.scrollTo({ top: logEl.value.scrollHeight, behavior: 'smooth' })
}
if (event.type === 'error' && typeof event.message === 'string') {
runError.value = event.message
}
if (event.type === 'cancelled') {
running.value = false
runCancelled.value = true
}
},
async () => {
running.value = false
await loadResults()
},
() => {
running.value = false
if (!runError.value) runError.value = 'Connection lost'
},
)
}
async function loadFineTunedModels() {
const { data } = await useApiFetch<FineTunedModel[]>('/api/finetune/status')
if (Array.isArray(data)) fineTunedModels.value = data
}
function startFinetune() {
if (ftRunning.value) return
ftRunning.value = true
ftLog.value = []
ftError.value = ''
ftCancelled.value = false
const params = new URLSearchParams({ model: ftModel.value, epochs: String(ftEpochs.value) })
useApiSSE(
`/api/finetune/run?${params}`,
async (event) => {
if (event.type === 'progress' && typeof event.message === 'string') {
ftLog.value.push(event.message)
await nextTick()
ftLogEl.value?.scrollTo({ top: ftLogEl.value.scrollHeight, behavior: 'smooth' })
}
if (event.type === 'error' && typeof event.message === 'string') {
ftError.value = event.message
}
if (event.type === 'cancelled') {
ftRunning.value = false
ftCancelled.value = true
}
},
async () => {
ftRunning.value = false
await loadFineTunedModels()
startBenchmark() // auto-trigger benchmark to refresh charts
},
() => {
ftRunning.value = false
if (!ftError.value) ftError.value = 'Connection lost'
},
)
}
onMounted(() => {
loadResults()
loadFineTunedModels()
loadModelCategories()
})
</script>
<style scoped>
.bench-view {
max-width: 860px;
margin: 0 auto;
padding: 1.5rem 1rem 4rem;
display: flex;
flex-direction: column;
gap: 1.75rem;
}
.bench-header {
display: flex;
align-items: center;
justify-content: space-between;
flex-wrap: wrap;
gap: 0.75rem;
}
.page-title {
font-family: var(--font-display, var(--font-body, sans-serif));
font-size: 1.4rem;
font-weight: 700;
color: var(--app-primary, #2A6080);
margin: 0;
}
.header-actions {
display: flex;
align-items: center;
gap: 0.75rem;
flex-wrap: wrap;
}
.slow-toggle {
display: flex;
align-items: center;
gap: 0.4rem;
font-size: 0.85rem;
color: var(--color-text-secondary, #6b7a99);
cursor: pointer;
user-select: none;
}
.slow-toggle.disabled { opacity: 0.5; pointer-events: none; }
.btn-run {
padding: 0.45rem 1.1rem;
border-radius: 0.375rem;
border: none;
background: var(--app-primary, #2A6080);
color: #fff;
font-size: 0.88rem;
font-family: var(--font-body, sans-serif);
cursor: pointer;
transition: opacity 0.15s;
}
.btn-run:disabled { opacity: 0.5; cursor: not-allowed; }
.btn-run:not(:disabled):hover { opacity: 0.85; }
.btn-cancel {
padding: 0.45rem 0.9rem;
background: transparent;
border: 1px solid var(--color-text-secondary, #6b7a99);
color: var(--color-text-secondary, #6b7a99);
border-radius: 0.4rem;
font-size: 0.85rem;
font-weight: 500;
cursor: pointer;
transition: background 0.15s;
}
.btn-cancel:hover {
background: color-mix(in srgb, var(--color-text-secondary, #6b7a99) 12%, transparent);
}
/* ── Run log ────────────────────────────────────────────── */
.run-log {
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.5rem;
overflow: hidden;
font-family: var(--font-mono, monospace);
font-size: 0.78rem;
}
.run-log-title {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.4rem 0.75rem;
background: var(--color-surface-raised, #e4ebf5);
border-bottom: 1px solid var(--color-border, #d0d7e8);
font-size: 0.8rem;
color: var(--color-text-secondary, #6b7a99);
}
.btn-ghost {
background: none;
border: none;
color: var(--color-text-secondary, #6b7a99);
cursor: pointer;
font-size: 0.78rem;
padding: 0.1rem 0.3rem;
border-radius: 0.2rem;
}
.btn-ghost:hover { background: var(--color-border, #d0d7e8); }
.log-lines {
max-height: 200px;
overflow-y: auto;
padding: 0.5rem 0.75rem;
background: var(--color-surface, #fff);
display: flex;
flex-direction: column;
gap: 0.1rem;
}
.log-line { color: var(--color-text, #1a2338); line-height: 1.5; }
.log-line.log-error { color: var(--color-error, #ef4444); }
.run-error {
margin: 0;
padding: 0.4rem 0.75rem;
background: color-mix(in srgb, var(--color-error, #ef4444) 10%, transparent);
color: var(--color-error, #ef4444);
font-size: 0.82rem;
font-family: var(--font-mono, monospace);
}
/* ── Status notices ─────────────────────────────────────── */
.status-notice {
color: var(--color-text-secondary, #6b7a99);
font-size: 0.9rem;
padding: 1rem;
}
.status-notice.empty {
display: flex;
flex-direction: column;
align-items: center;
gap: 0.5rem;
padding: 3rem 1rem;
text-align: center;
}
.hint { font-size: 0.85rem; opacity: 0.75; }
/* ── Meta line ──────────────────────────────────────────── */
.meta-line {
display: flex;
gap: 0.5rem;
align-items: center;
font-size: 0.85rem;
color: var(--color-text-secondary, #6b7a99);
font-family: var(--font-mono, monospace);
flex-wrap: wrap;
}
.sep { opacity: 0.4; }
/* ── Chart sections ─────────────────────────────────────── */
.chart-section {
display: flex;
flex-direction: column;
gap: 0.75rem;
}
.chart-title {
font-size: 0.95rem;
font-weight: 600;
color: var(--color-text, #1a2338);
margin: 0;
}
/* ── Bar charts ─────────────────────────────────────────── */
.bar-chart {
display: flex;
flex-direction: column;
gap: 0.4rem;
}
.bar-row {
display: grid;
grid-template-columns: 14rem 1fr 5rem;
align-items: center;
gap: 0.5rem;
font-size: 0.82rem;
}
.bar-label {
font-family: var(--font-mono, monospace);
font-size: 0.76rem;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
color: var(--color-text, #1a2338);
}
.bar-track {
height: 16px;
background: var(--color-surface-raised, #e4ebf5);
border-radius: 99px;
overflow: hidden;
}
.bar-fill {
height: 100%;
border-radius: 99px;
transition: width 0.5s cubic-bezier(0.16, 1, 0.3, 1);
}
.latency-fill { background: var(--app-primary, #2A6080); opacity: 0.65; }
.bar-value {
text-align: right;
font-family: var(--font-mono, monospace);
font-size: 0.8rem;
font-variant-numeric: tabular-nums;
}
/* ── Heatmap ────────────────────────────────────────────── */
.heatmap-scroll {
overflow-x: auto;
border-radius: 0.5rem;
border: 1px solid var(--color-border, #d0d7e8);
}
.heatmap {
border-collapse: collapse;
min-width: 100%;
font-size: 0.78rem;
}
.hm-label-col {
text-align: left;
min-width: 11rem;
padding: 0.4rem 0.6rem;
background: var(--color-surface-raised, #e4ebf5);
font-weight: 600;
border-bottom: 1px solid var(--color-border, #d0d7e8);
position: sticky;
left: 0;
}
.hm-model-col {
min-width: 5rem;
max-width: 8rem;
padding: 0.4rem 0.5rem;
background: var(--color-surface-raised, #e4ebf5);
border-bottom: 1px solid var(--color-border, #d0d7e8);
font-family: var(--font-mono, monospace);
font-size: 0.7rem;
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
text-align: center;
}
.hm-label-cell {
padding: 0.35rem 0.6rem;
background: var(--color-surface, #fff);
border-top: 1px solid var(--color-border, #d0d7e8);
white-space: nowrap;
font-family: var(--font-mono, monospace);
font-size: 0.74rem;
position: sticky;
left: 0;
}
.hm-emoji { margin-right: 0.3rem; }
.hm-value-cell {
padding: 0.35rem 0.5rem;
text-align: center;
font-family: var(--font-mono, monospace);
font-variant-numeric: tabular-nums;
border-top: 1px solid rgba(255,255,255,0.08);
cursor: default;
transition: filter 0.15s;
}
.hm-value-cell:hover { filter: brightness(1.15); }
.heatmap-hint {
font-size: 0.75rem;
color: var(--color-text-secondary, #6b7a99);
margin: 0;
}
/* ── Mobile tweaks ──────────────────────────────────────── */
@media (max-width: 600px) {
.bar-row { grid-template-columns: 9rem 1fr 4rem; }
.bar-label { font-size: 0.7rem; }
.bench-header { flex-direction: column; align-items: flex-start; }
}
/* ── Trained models badge row ──────────────────────────── */
.trained-models-row {
display: flex;
flex-wrap: wrap;
align-items: center;
gap: 0.5rem;
padding: 0.6rem 0.75rem;
background: var(--color-surface-raised, #e4ebf5);
border-radius: 0.5rem;
border: 1px solid var(--color-border, #d0d7e8);
}
.trained-label {
font-size: 0.75rem;
font-weight: 700;
color: var(--color-text-secondary, #6b7a99);
text-transform: uppercase;
letter-spacing: 0.05em;
flex-shrink: 0;
}
.trained-badge {
display: inline-flex;
align-items: center;
gap: 0.35rem;
padding: 0.2rem 0.55rem;
background: var(--app-primary, #2A6080);
color: #fff;
border-radius: 1rem;
font-family: var(--font-mono, monospace);
font-size: 0.76rem;
cursor: default;
}
.trained-f1 {
background: rgba(255,255,255,0.2);
border-radius: 0.75rem;
padding: 0.05rem 0.35rem;
font-size: 0.7rem;
font-weight: 700;
}
/* ── Model Picker ───────────────────────────────────────── */
.model-picker {
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.5rem;
overflow: hidden;
}
.picker-summary {
display: flex;
align-items: center;
gap: 0.6rem;
padding: 0.65rem 0.9rem;
cursor: pointer;
user-select: none;
list-style: none;
background: var(--color-surface-raised, #e4ebf5);
}
.picker-summary::-webkit-details-marker { display: none; }
.picker-summary::before { content: '▶ '; font-size: 0.65rem; color: var(--color-text-secondary, #6b7a99); }
details[open] .picker-summary::before { content: '▼ '; }
.picker-title {
font-size: 0.9rem;
font-weight: 600;
color: var(--color-text, #1a2338);
}
.picker-badge {
font-size: 0.75rem;
color: var(--color-text-secondary, #6b7a99);
background: var(--color-surface, #fff);
border: 1px solid var(--color-border, #d0d7e8);
padding: 0.15rem 0.5rem;
border-radius: 1rem;
font-family: var(--font-mono, monospace);
margin-left: auto;
}
.picker-body {
padding: 0.75rem;
border-top: 1px solid var(--color-border, #d0d7e8);
display: flex;
flex-direction: column;
gap: 0.75rem;
}
.picker-loading,
.picker-empty {
font-size: 0.85rem;
color: var(--color-text-secondary, #6b7a99);
padding: 0.5rem 0;
}
.picker-category {
display: flex;
flex-direction: column;
gap: 0.3rem;
}
.picker-cat-header {
display: flex;
align-items: center;
gap: 0.45rem;
font-size: 0.82rem;
font-weight: 700;
color: var(--color-text, #1a2338);
text-transform: uppercase;
letter-spacing: 0.04em;
cursor: pointer;
}
.picker-cat-count {
font-weight: 400;
color: var(--color-text-secondary, #6b7a99);
font-family: var(--font-mono, monospace);
font-size: 0.75rem;
text-transform: none;
letter-spacing: 0;
}
.picker-no-models {
font-size: 0.78rem;
color: var(--color-text-secondary, #6b7a99);
opacity: 0.65;
padding-left: 1.4rem;
font-style: italic;
}
.picker-model-list {
display: flex;
flex-wrap: wrap;
gap: 0.35rem 0.75rem;
padding-left: 1.4rem;
}
.picker-model-row {
display: flex;
align-items: center;
gap: 0.35rem;
font-size: 0.82rem;
cursor: pointer;
color: var(--color-text, #1a2338);
}
.picker-model-name {
font-family: var(--font-mono, monospace);
font-size: 0.78rem;
white-space: nowrap;
max-width: 18ch;
overflow: hidden;
text-overflow: ellipsis;
}
.picker-adapter-type {
font-size: 0.68rem;
color: var(--color-text-secondary, #6b7a99);
background: var(--color-surface-raised, #e4ebf5);
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.25rem;
padding: 0.05rem 0.3rem;
font-family: var(--font-mono, monospace);
}
@media (max-width: 600px) {
.picker-model-list { padding-left: 0; }
.picker-model-name { max-width: 14ch; }
}
/* ── Fine-tune section ──────────────────────────────────── */
.ft-section {
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.5rem;
overflow: hidden;
}
.ft-summary {
padding: 0.65rem 0.9rem;
cursor: pointer;
font-size: 0.9rem;
font-weight: 600;
color: var(--color-text, #1a2338);
user-select: none;
list-style: none;
background: var(--color-surface-raised, #e4ebf5);
}
.ft-summary::-webkit-details-marker { display: none; }
.ft-summary::before { content: '▶ '; font-size: 0.65rem; color: var(--color-text-secondary, #6b7a99); }
details[open] .ft-summary::before { content: '▼ '; }
.ft-body {
padding: 0.75rem;
display: flex;
flex-direction: column;
gap: 0.75rem;
border-top: 1px solid var(--color-border, #d0d7e8);
}
.ft-controls {
display: flex;
flex-wrap: wrap;
gap: 0.75rem;
align-items: flex-end;
}
.ft-field {
display: flex;
flex-direction: column;
gap: 0.25rem;
}
.ft-field-label {
font-size: 0.75rem;
font-weight: 600;
color: var(--color-text-secondary, #6b7a99);
text-transform: uppercase;
letter-spacing: 0.04em;
}
.ft-select {
padding: 0.35rem 0.5rem;
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.375rem;
background: var(--color-surface, #fff);
font-size: 0.85rem;
color: var(--color-text, #1a2338);
min-width: 220px;
}
.ft-select:disabled { opacity: 0.55; }
.ft-epochs {
width: 64px;
padding: 0.35rem 0.5rem;
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.375rem;
background: var(--color-surface, #fff);
font-size: 0.85rem;
color: var(--color-text, #1a2338);
text-align: center;
}
.ft-epochs:disabled { opacity: 0.55; }
.ft-run-btn { align-self: flex-end; }
.ft-log { margin-top: 0; }
@media (max-width: 600px) {
.ft-controls { flex-direction: column; align-items: stretch; }
.ft-select { min-width: 0; width: 100%; }
}
</style>