diff --git a/web/src/views/BenchmarkView.vue b/web/src/views/BenchmarkView.vue index 53df30c..9159d55 100644 --- a/web/src/views/BenchmarkView.vue +++ b/web/src/views/BenchmarkView.vue @@ -17,6 +17,22 @@ + +
+ Trained: + + {{ m.name }} + + F1 {{ (m.val_macro_f1 * 100).toFixed(1) }}% + + +
+
@@ -124,6 +140,54 @@

Hover a cell for precision / recall / support. Color: 🟒 β‰₯ 0.7 Β· 🟑 0.4–0.7 Β· πŸ”΄ < 0.4

+ + +
+ Fine-tune a model +
+
+ + + +
+ +
+
+ {{ ftRunning ? '⏳ Training…' : ftError ? '❌ Failed' : 'βœ… Done' }} + +
+
+
{{ line }}
+
+

{{ ftError }}

+
+
+
@@ -146,6 +210,14 @@ const LABEL_META: Record = { } // ── Types ──────────────────────────────────────────────────────────────────── +interface FineTunedModel { + name: string + base_model_id?: string + val_macro_f1?: number + timestamp?: string + sample_count?: number +} + interface PerLabel { f1: number; precision: number; recall: number; support: number } interface ModelResult { macro_f1: number @@ -168,6 +240,15 @@ const runError = ref('') const includeSlow = ref(false) const logEl = ref(null) +// Fine-tune state +const fineTunedModels = ref([]) +const ftModel = ref('deberta-small') +const ftEpochs = ref(5) +const ftRunning = ref(false) +const ftLog = ref([]) +const ftError = ref('') +const ftLogEl = ref(null) + // ── Derived ────────────────────────────────────────────────────────────────── const modelNames = computed(() => Object.keys(results.value?.models ?? {})) const modelCount = computed(() => modelNames.value.length) @@ -272,7 +353,46 @@ function startBenchmark() { ) } -onMounted(loadResults) +async function loadFineTunedModels() { + const { data } = await useApiFetch('/api/finetune/status') + if (Array.isArray(data)) fineTunedModels.value = data +} + +function startFinetune() { + if (ftRunning.value) return + ftRunning.value = true + ftLog.value = [] + ftError.value = '' + + const params = new URLSearchParams({ model: ftModel.value, epochs: String(ftEpochs.value) }) + useApiSSE( + `/api/finetune/run?${params}`, + async (event) => { + if (event.type === 'progress' && typeof event.message === 'string') { + ftLog.value.push(event.message) + await nextTick() + ftLogEl.value?.scrollTo({ top: ftLogEl.value.scrollHeight, behavior: 'smooth' }) + } + if (event.type === 'error' && typeof event.message === 'string') { + ftError.value = event.message + } + }, + async () => { + ftRunning.value = false + await loadFineTunedModels() + startBenchmark() // auto-trigger benchmark to refresh charts + }, + () => { + ftRunning.value = false + if (!ftError.value) ftError.value = 'Connection lost' + }, + ) +} + +onMounted(() => { + loadResults() + loadFineTunedModels() +})