Compare commits

...

10 commits

Author SHA1 Message Date
09e334359f fix: pessimistic submit/undo, config null-safe, load config on mount
- sft.py GET /config: use `or {}` guard so `sft: ~` (null YAML) doesn't
  return None instead of the default empty config
- CorrectionsView: convert handleCorrect/Discard/Flag and handleUndo from
  optimistic to pessimistic — queue mutation only happens after server
  confirms; failures leave item in queue so user can retry cleanly
- SettingsView: call loadSftConfig() on mount so saved bench_results_dir
  is populated instead of always starting empty
2026-04-08 18:49:38 -07:00
353d0a47a0 feat: Corrections tab — router, sidebar, settings, SFT config endpoints
- Add /corrections route to Vue router (lazy-loaded CorrectionsView)
- Add Corrections nav item (✍️) to AppSidebar after Benchmark
- Add cf-orch Integration section to SettingsView with bench_results_dir
  field, run scanner, and per-run import table
- Add GET /api/sft/config and POST /api/sft/config endpoints to app/sft.py
2026-04-08 18:29:22 -07:00
e63d77127b feat: CorrectionsView and useSftKeyboard composable 2026-04-08 15:26:13 -07:00
03e5f9f9b4 fix: guard null failure_reason render, fix mid-quality test description
- Add v-if guard on failure-reason <p> so null renders no element (not literal "null")
- Clarify mid-quality test description: score is 0.4 to <0.7 (exclusive upper bound)
- Add test: renders nothing for failure_reason when null (+1 → 14 SftCard tests)
2026-04-08 15:23:19 -07:00
e16ea95dcc fix: guard aria-describedby from rendering undefined string 2026-04-08 15:22:12 -07:00
8873920b83 feat: SftCard — quality chip, prompt collapsible, action buttons, correction area slot 2026-04-08 15:19:37 -07:00
2d939b77f9 feat: SftCorrectionArea — inline correction text area component 2026-04-08 15:16:45 -07:00
137a9dbb8e fix: nullable failure_reason, factory fixture for sft store tests 2026-04-08 15:14:29 -07:00
9c11916d81 feat: useSftStore — SftQueueItem type and Pinia store 2026-04-08 15:11:17 -07:00
b6d45c746c fix: shared _is_exportable predicate, return type annotations on export/stats 2026-04-08 15:07:24 -07:00
12 changed files with 1276 additions and 22 deletions

View file

@ -80,6 +80,15 @@ def _write_candidates(records: list[dict]) -> None:
write_jsonl(_candidates_file(), records)
def _is_exportable(r: dict) -> bool:
"""Return True if an approved record is ready to include in SFT export."""
return (
r.get("status") == "approved"
and bool(r.get("corrected_response"))
and str(r["corrected_response"]).strip() != ""
)
# ── GET /runs ──────────────────────────────────────────────────────────────
@router.get("/runs")
@ -211,15 +220,9 @@ def post_undo(req: UndoRequest):
# ── GET /export ─────────────────────────────────────────────────────────────
@router.get("/export")
def get_export():
def get_export() -> StreamingResponse:
"""Stream approved records as SFT-ready JSONL for download."""
approved = read_jsonl(_approved_file())
exportable = [
r for r in approved
if r.get("status") == "approved"
and r.get("corrected_response")
and str(r["corrected_response"]).strip()
]
exportable = [r for r in read_jsonl(_approved_file()) if _is_exportable(r)]
def generate():
for r in exportable:
@ -243,7 +246,7 @@ def get_export():
# ── GET /stats ──────────────────────────────────────────────────────────────
@router.get("/stats")
def get_stats():
def get_stats() -> dict[str, object]:
"""Return counts by status, model, and task type."""
records = _read_candidates()
by_status: dict[str, int] = {}
@ -259,10 +262,7 @@ def get_stats():
by_task_type[task_type] = by_task_type.get(task_type, 0) + 1
approved = read_jsonl(_approved_file())
export_ready = sum(
1 for r in approved
if r.get("corrected_response") and str(r["corrected_response"]).strip()
)
export_ready = sum(1 for r in approved if _is_exportable(r))
return {
"total": len(records),
@ -271,3 +271,40 @@ def get_stats():
"by_task_type": by_task_type,
"export_ready": export_ready,
}
# ── GET /config ─────────────────────────────────────────────────────────────
@router.get("/config")
def get_sft_config() -> dict:
"""Return the current SFT configuration (bench_results_dir)."""
f = _config_file()
if not f.exists():
return {"bench_results_dir": ""}
try:
raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
except yaml.YAMLError:
return {"bench_results_dir": ""}
sft_section = raw.get("sft") or {}
return {"bench_results_dir": sft_section.get("bench_results_dir", "")}
class SftConfigPayload(BaseModel):
bench_results_dir: str
@router.post("/config")
def post_sft_config(payload: SftConfigPayload) -> dict:
"""Write the bench_results_dir setting to the config file."""
f = _config_file()
f.parent.mkdir(parents=True, exist_ok=True)
try:
raw = yaml.safe_load(f.read_text(encoding="utf-8")) if f.exists() else {}
raw = raw or {}
except yaml.YAMLError:
raw = {}
raw["sft"] = {"bench_results_dir": payload.bench_results_dir}
tmp = f.with_suffix(".tmp")
tmp.write_text(yaml.dump(raw, allow_unicode=True, sort_keys=False), encoding="utf-8")
tmp.rename(f)
return {"ok": True}

View file

@ -65,8 +65,9 @@ const navItems = [
{ path: '/', icon: '🃏', label: 'Label' },
{ path: '/fetch', icon: '📥', label: 'Fetch' },
{ path: '/stats', icon: '📊', label: 'Stats' },
{ path: '/benchmark', icon: '🏁', label: 'Benchmark' },
{ path: '/settings', icon: '⚙️', label: 'Settings' },
{ path: '/benchmark', icon: '🏁', label: 'Benchmark' },
{ path: '/corrections', icon: '✍️', label: 'Corrections' },
{ path: '/settings', icon: '⚙️', label: 'Settings' },
]
const stowed = ref(localStorage.getItem(LS_KEY) === 'true')

View file

@ -0,0 +1,98 @@
import { mount } from '@vue/test-utils'
import SftCard from './SftCard.vue'
import type { SftQueueItem } from '../stores/sft'
import { describe, it, expect } from 'vitest'
const LOW_QUALITY_ITEM: SftQueueItem = {
id: 'abc', source: 'cf-orch-benchmark', benchmark_run_id: 'run1',
timestamp: '2026-04-07T10:00:00Z', status: 'needs_review',
prompt_messages: [
{ role: 'system', content: 'You are a coding assistant.' },
{ role: 'user', content: 'Write a Python add function.' },
],
model_response: 'def add(a, b): return a - b',
corrected_response: null, quality_score: 0.2,
failure_reason: 'pattern_match: 0/2 matched',
task_id: 'code-fn', task_type: 'code', task_name: 'Code: Write a function',
model_id: 'Qwen/Qwen2.5-3B', model_name: 'Qwen2.5-3B',
node_id: 'heimdall', gpu_id: 0, tokens_per_sec: 38.4,
}
const MID_QUALITY_ITEM: SftQueueItem = { ...LOW_QUALITY_ITEM, id: 'mid', quality_score: 0.55 }
const HIGH_QUALITY_ITEM: SftQueueItem = { ...LOW_QUALITY_ITEM, id: 'hi', quality_score: 0.72 }
describe('SftCard', () => {
it('renders model name chip', () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
expect(w.text()).toContain('Qwen2.5-3B')
})
it('renders task type chip', () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
expect(w.text()).toContain('code')
})
it('renders failure reason', () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
expect(w.text()).toContain('pattern_match: 0/2 matched')
})
it('renders model response', () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
expect(w.text()).toContain('def add(a, b): return a - b')
})
it('quality chip shows numeric value for low quality', () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
expect(w.text()).toContain('0.20')
})
it('quality chip has low-quality class when score < 0.4', () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
expect(w.find('[data-testid="quality-chip"]').classes()).toContain('quality-low')
})
it('quality chip has mid-quality class when score is 0.4 to <0.7', () => {
const w = mount(SftCard, { props: { item: MID_QUALITY_ITEM } })
expect(w.find('[data-testid="quality-chip"]').classes()).toContain('quality-mid')
})
it('quality chip has acceptable class when score >= 0.7', () => {
const w = mount(SftCard, { props: { item: HIGH_QUALITY_ITEM } })
expect(w.find('[data-testid="quality-chip"]').classes()).toContain('quality-ok')
})
it('clicking Correct button emits correct', async () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
await w.find('[data-testid="correct-btn"]').trigger('click')
expect(w.emitted('correct')).toBeTruthy()
})
it('clicking Discard button emits discard', async () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
await w.find('[data-testid="discard-btn"]').trigger('click')
expect(w.emitted('discard')).toBeTruthy()
})
it('clicking Flag Model button emits flag', async () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
await w.find('[data-testid="flag-btn"]').trigger('click')
expect(w.emitted('flag')).toBeTruthy()
})
it('correction area hidden initially', () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM } })
expect(w.find('[data-testid="correction-area"]').exists()).toBe(false)
})
it('correction area shown when correcting prop is true', () => {
const w = mount(SftCard, { props: { item: LOW_QUALITY_ITEM, correcting: true } })
expect(w.find('[data-testid="correction-area"]').exists()).toBe(true)
})
it('renders nothing for failure reason when null', () => {
const item = { ...LOW_QUALITY_ITEM, failure_reason: null }
const w = mount(SftCard, { props: { item } })
expect(w.find('.failure-reason').exists()).toBe(false)
})
})

View file

@ -0,0 +1,246 @@
<template>
<article class="sft-card">
<!-- Chips row -->
<div class="chips-row">
<span class="chip chip-model">{{ item.model_name }}</span>
<span class="chip chip-task">{{ item.task_type }}</span>
<span class="chip chip-node">{{ item.node_id }} · GPU {{ item.gpu_id }}</span>
<span class="chip chip-speed">{{ item.tokens_per_sec.toFixed(1) }} tok/s</span>
<span
class="chip quality-chip"
:class="qualityClass"
data-testid="quality-chip"
:title="qualityLabel"
>
{{ item.quality_score.toFixed(2) }} · {{ qualityLabel }}
</span>
</div>
<!-- Failure reason -->
<p v-if="item.failure_reason" class="failure-reason">{{ item.failure_reason }}</p>
<!-- Prompt (collapsible) -->
<div class="prompt-section">
<button
class="prompt-toggle"
:aria-expanded="promptExpanded"
@click="promptExpanded = !promptExpanded"
>
{{ promptExpanded ? 'Hide prompt ↑' : 'Show full prompt ↓' }}
</button>
<div v-if="promptExpanded" class="prompt-messages">
<div
v-for="(msg, i) in item.prompt_messages"
:key="i"
class="prompt-message"
:class="`role-${msg.role}`"
>
<span class="role-label">{{ msg.role }}</span>
<pre class="message-content">{{ msg.content }}</pre>
</div>
</div>
</div>
<!-- Model response -->
<div class="model-response-section">
<p class="section-label">Model output (incorrect)</p>
<pre class="model-response">{{ item.model_response }}</pre>
</div>
<!-- Action bar -->
<div class="action-bar">
<button
data-testid="correct-btn"
class="btn-correct"
@click="$emit('correct')"
> Correct</button>
<button
data-testid="discard-btn"
class="btn-discard"
@click="$emit('discard')"
> Discard</button>
<button
data-testid="flag-btn"
class="btn-flag"
@click="$emit('flag')"
> Flag Model</button>
</div>
<!-- Correction area (shown when correcting = true) -->
<div v-if="correcting" data-testid="correction-area">
<SftCorrectionArea
ref="correctionAreaEl"
:described-by="'sft-failure-' + item.id"
@submit="$emit('submit-correction', $event)"
@cancel="$emit('cancel-correction')"
/>
</div>
</article>
</template>
<script setup lang="ts">
import { ref, computed } from 'vue'
import type { SftQueueItem } from '../stores/sft'
import SftCorrectionArea from './SftCorrectionArea.vue'
const props = defineProps<{ item: SftQueueItem; correcting?: boolean }>()
const emit = defineEmits<{
correct: []
discard: []
flag: []
'submit-correction': [text: string]
'cancel-correction': []
}>()
const promptExpanded = ref(false)
const correctionAreaEl = ref<InstanceType<typeof SftCorrectionArea> | null>(null)
const qualityClass = computed(() => {
const s = props.item.quality_score
if (s < 0.4) return 'quality-low'
if (s < 0.7) return 'quality-mid'
return 'quality-ok'
})
const qualityLabel = computed(() => {
const s = props.item.quality_score
if (s < 0.4) return 'low quality'
if (s < 0.7) return 'fair'
return 'acceptable'
})
function resetCorrection() {
correctionAreaEl.value?.reset()
}
defineExpose({ resetCorrection })
</script>
<style scoped>
.sft-card {
background: var(--color-surface-raised);
border: 1px solid var(--color-border);
border-radius: var(--radius-lg);
padding: var(--space-4);
display: flex;
flex-direction: column;
gap: var(--space-3);
}
.chips-row {
display: flex;
flex-wrap: wrap;
gap: var(--space-2);
}
.chip {
padding: var(--space-1) var(--space-2);
border-radius: var(--radius-full);
font-size: 0.78rem;
font-weight: 600;
white-space: nowrap;
}
.chip-model { background: var(--color-primary-light, #e8f2e7); color: var(--color-primary); }
.chip-task { background: var(--color-surface-alt); color: var(--color-text-muted); }
.chip-node { background: var(--color-surface-alt); color: var(--color-text-muted); }
.chip-speed { background: var(--color-surface-alt); color: var(--color-text-muted); }
.quality-chip { color: #fff; }
.quality-low { background: var(--color-error, #c0392b); }
.quality-mid { background: var(--color-warning, #d4891a); }
.quality-ok { background: var(--color-success, #3a7a32); }
.failure-reason {
font-size: 0.82rem;
color: var(--color-text-muted);
font-style: italic;
}
.prompt-toggle {
background: none;
border: none;
color: var(--color-accent);
font-size: 0.85rem;
cursor: pointer;
padding: 0;
text-decoration: underline;
}
.prompt-messages {
margin-top: var(--space-2);
display: flex;
flex-direction: column;
gap: var(--space-2);
}
.prompt-message {
display: flex;
flex-direction: column;
gap: var(--space-1);
}
.role-label {
font-size: 0.75rem;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--color-text-muted);
}
.message-content {
font-family: var(--font-mono);
font-size: 0.82rem;
white-space: pre-wrap;
background: var(--color-surface-alt);
padding: var(--space-2) var(--space-3);
border-radius: var(--radius-md);
max-height: 200px;
overflow-y: auto;
}
.section-label {
font-size: 0.82rem;
font-weight: 600;
color: var(--color-text-muted);
margin-bottom: var(--space-1);
}
.model-response {
font-family: var(--font-mono);
font-size: 0.88rem;
white-space: pre-wrap;
background: color-mix(in srgb, var(--color-error, #c0392b) 8%, var(--color-surface-alt));
border-left: 3px solid var(--color-error, #c0392b);
padding: var(--space-3);
border-radius: var(--radius-md);
max-height: 300px;
overflow-y: auto;
}
.action-bar {
display: flex;
gap: var(--space-3);
flex-wrap: wrap;
}
.action-bar button {
padding: var(--space-2) var(--space-4);
border-radius: var(--radius-md);
border: 1px solid var(--color-border);
font-size: 0.9rem;
cursor: pointer;
background: var(--color-surface-raised);
color: var(--color-text);
}
.btn-correct { border-color: var(--color-success); color: var(--color-success); }
.btn-correct:hover { background: color-mix(in srgb, var(--color-success) 10%, transparent); }
.btn-discard { border-color: var(--color-error); color: var(--color-error); }
.btn-discard:hover { background: color-mix(in srgb, var(--color-error) 10%, transparent); }
.btn-flag { border-color: var(--color-warning); color: var(--color-warning); }
.btn-flag:hover { background: color-mix(in srgb, var(--color-warning) 10%, transparent); }
</style>

View file

@ -0,0 +1,68 @@
import { mount } from '@vue/test-utils'
import SftCorrectionArea from './SftCorrectionArea.vue'
import { describe, it, expect } from 'vitest'
describe('SftCorrectionArea', () => {
it('renders a textarea', () => {
const w = mount(SftCorrectionArea)
expect(w.find('textarea').exists()).toBe(true)
})
it('submit button is disabled when textarea is empty', () => {
const w = mount(SftCorrectionArea)
const btn = w.find('[data-testid="submit-btn"]')
expect((btn.element as HTMLButtonElement).disabled).toBe(true)
})
it('submit button is disabled when textarea is whitespace only', async () => {
const w = mount(SftCorrectionArea)
await w.find('textarea').setValue(' ')
const btn = w.find('[data-testid="submit-btn"]')
expect((btn.element as HTMLButtonElement).disabled).toBe(true)
})
it('submit button is enabled when textarea has content', async () => {
const w = mount(SftCorrectionArea)
await w.find('textarea').setValue('def add(a, b): return a + b')
const btn = w.find('[data-testid="submit-btn"]')
expect((btn.element as HTMLButtonElement).disabled).toBe(false)
})
it('clicking submit emits submit with trimmed text', async () => {
const w = mount(SftCorrectionArea)
await w.find('textarea').setValue(' def add(a, b): return a + b ')
await w.find('[data-testid="submit-btn"]').trigger('click')
expect(w.emitted('submit')?.[0]).toEqual(['def add(a, b): return a + b'])
})
it('clicking cancel emits cancel', async () => {
const w = mount(SftCorrectionArea)
await w.find('[data-testid="cancel-btn"]').trigger('click')
expect(w.emitted('cancel')).toBeTruthy()
})
it('Escape key emits cancel', async () => {
const w = mount(SftCorrectionArea)
await w.find('textarea').trigger('keydown', { key: 'Escape' })
expect(w.emitted('cancel')).toBeTruthy()
})
it('Ctrl+Enter emits submit when text is non-empty', async () => {
const w = mount(SftCorrectionArea)
await w.find('textarea').setValue('correct answer')
await w.find('textarea').trigger('keydown', { key: 'Enter', ctrlKey: true })
expect(w.emitted('submit')?.[0]).toEqual(['correct answer'])
})
it('Ctrl+Enter does not emit submit when text is empty', async () => {
const w = mount(SftCorrectionArea)
await w.find('textarea').trigger('keydown', { key: 'Enter', ctrlKey: true })
expect(w.emitted('submit')).toBeFalsy()
})
it('omits aria-describedby when describedBy prop is not provided', () => {
const w = mount(SftCorrectionArea)
const textarea = w.find('textarea')
expect(textarea.attributes('aria-describedby')).toBeUndefined()
})
})

View file

@ -0,0 +1,130 @@
<template>
<div class="correction-area">
<label class="correction-label" for="correction-textarea">
Write the corrected response:
</label>
<textarea
id="correction-textarea"
ref="textareaEl"
v-model="text"
class="correction-textarea"
aria-label="Write corrected response"
aria-required="true"
:aria-describedby="describedBy || undefined"
placeholder="Write the response this model should have given..."
rows="4"
@keydown.escape="$emit('cancel')"
@keydown.enter.ctrl.prevent="submitIfValid"
@keydown.enter.meta.prevent="submitIfValid"
/>
<div class="correction-actions">
<button
data-testid="submit-btn"
class="btn-submit"
:disabled="!isValid"
@click="submitIfValid"
>
Submit correction
</button>
<button data-testid="cancel-btn" class="btn-cancel" @click="$emit('cancel')">
Cancel
</button>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, computed, onMounted } from 'vue'
const props = withDefaults(defineProps<{ describedBy?: string }>(), { describedBy: undefined })
const emit = defineEmits<{ submit: [text: string]; cancel: [] }>()
const text = ref('')
const textareaEl = ref<HTMLTextAreaElement | null>(null)
const isValid = computed(() => text.value.trim().length > 0)
onMounted(() => textareaEl.value?.focus())
function submitIfValid() {
if (isValid.value) emit('submit', text.value.trim())
}
function reset() {
text.value = ''
}
defineExpose({ reset })
</script>
<style scoped>
.correction-area {
display: flex;
flex-direction: column;
gap: var(--space-3);
padding: var(--space-4);
border-top: 1px solid var(--color-border);
background: var(--color-surface-alt, var(--color-surface));
border-radius: 0 0 var(--radius-lg) var(--radius-lg);
}
.correction-label {
font-size: 0.85rem;
font-weight: 600;
color: var(--color-text-muted);
}
.correction-textarea {
width: 100%;
min-height: 7rem;
padding: var(--space-3);
border: 1px solid var(--color-border);
border-radius: var(--radius-md);
background: var(--color-surface-raised);
color: var(--color-text);
font-family: var(--font-mono);
font-size: 0.88rem;
line-height: 1.5;
resize: vertical;
}
.correction-textarea:focus {
outline: 2px solid var(--color-primary);
outline-offset: 1px;
}
.correction-actions {
display: flex;
gap: var(--space-3);
align-items: center;
}
.btn-submit {
padding: var(--space-2) var(--space-4);
background: var(--color-primary);
color: var(--color-text-inverse, #fff);
border: none;
border-radius: var(--radius-md);
font-size: 0.9rem;
cursor: pointer;
}
.btn-submit:disabled {
opacity: 0.45;
cursor: not-allowed;
}
.btn-submit:not(:disabled):hover {
background: var(--color-primary-hover, var(--color-primary));
}
.btn-cancel {
background: none;
border: none;
color: var(--color-text-muted);
font-size: 0.9rem;
cursor: pointer;
text-decoration: underline;
padding: 0;
}
</style>

View file

@ -0,0 +1,42 @@
// src/composables/useSftKeyboard.ts
import { onUnmounted, getCurrentInstance } from 'vue'
interface Options {
onCorrect: () => void
onDiscard: () => void
onFlag: () => void
onEscape: () => void
onSubmit: () => void
isEditing: () => boolean // returns true when correction area is open
}
export function useSftKeyboard(opts: Options) {
function handler(e: KeyboardEvent) {
// Never intercept keys when focus is in an input (correction textarea handles its own keys)
if (e.target instanceof HTMLInputElement) return
// When correction area is open, only handle Escape (textarea handles Ctrl+Enter itself)
if (e.target instanceof HTMLTextAreaElement) return
const k = e.key.toLowerCase()
if (opts.isEditing()) {
if (k === 'escape') opts.onEscape()
return
}
if (k === 'c') { opts.onCorrect(); return }
if (k === 'd') { opts.onDiscard(); return }
if (k === 'f') { opts.onFlag(); return }
if (k === 'escape') { opts.onEscape(); return }
}
window.addEventListener('keydown', handler)
const cleanup = () => window.removeEventListener('keydown', handler)
if (getCurrentInstance()) {
onUnmounted(cleanup)
}
return { cleanup }
}

View file

@ -5,15 +5,17 @@ import LabelView from '../views/LabelView.vue'
const FetchView = () => import('../views/FetchView.vue')
const StatsView = () => import('../views/StatsView.vue')
const BenchmarkView = () => import('../views/BenchmarkView.vue')
const SettingsView = () => import('../views/SettingsView.vue')
const SettingsView = () => import('../views/SettingsView.vue')
const CorrectionsView = () => import('../views/CorrectionsView.vue')
export const router = createRouter({
history: createWebHashHistory(),
routes: [
{ path: '/', component: LabelView, meta: { title: 'Label' } },
{ path: '/fetch', component: FetchView, meta: { title: 'Fetch' } },
{ path: '/stats', component: StatsView, meta: { title: 'Stats' } },
{ path: '/benchmark', component: BenchmarkView, meta: { title: 'Benchmark' } },
{ path: '/settings', component: SettingsView, meta: { title: 'Settings' } },
{ path: '/', component: LabelView, meta: { title: 'Label' } },
{ path: '/fetch', component: FetchView, meta: { title: 'Fetch' } },
{ path: '/stats', component: StatsView, meta: { title: 'Stats' } },
{ path: '/benchmark', component: BenchmarkView, meta: { title: 'Benchmark' } },
{ path: '/corrections', component: CorrectionsView, meta: { title: 'Corrections' } },
{ path: '/settings', component: SettingsView, meta: { title: 'Settings' } },
],
})

View file

@ -0,0 +1,78 @@
import { setActivePinia, createPinia } from 'pinia'
import { useSftStore } from './sft'
import type { SftQueueItem } from './sft'
import { beforeEach, describe, it, expect } from 'vitest'
function makeMockItem(overrides: Partial<SftQueueItem> = {}): SftQueueItem {
return {
id: 'abc',
source: 'cf-orch-benchmark',
benchmark_run_id: 'run1',
timestamp: '2026-04-07T10:00:00Z',
status: 'needs_review',
prompt_messages: [
{ role: 'system', content: 'You are a coding assistant.' },
{ role: 'user', content: 'Write a Python add function.' },
],
model_response: 'def add(a, b): return a - b',
corrected_response: null,
quality_score: 0.2,
failure_reason: 'pattern_match: 0/2 matched',
task_id: 'code-fn',
task_type: 'code',
task_name: 'Code: Write a Python function',
model_id: 'Qwen/Qwen2.5-3B',
model_name: 'Qwen2.5-3B',
node_id: 'heimdall',
gpu_id: 0,
tokens_per_sec: 38.4,
...overrides,
}
}
describe('useSftStore', () => {
beforeEach(() => setActivePinia(createPinia()))
it('starts with empty queue', () => {
const store = useSftStore()
expect(store.queue).toEqual([])
expect(store.current).toBeNull()
})
it('current returns first item', () => {
const store = useSftStore()
store.queue = [makeMockItem()]
expect(store.current?.id).toBe('abc')
})
it('removeCurrentFromQueue removes first item', () => {
const store = useSftStore()
const second = makeMockItem({ id: 'def' })
store.queue = [makeMockItem(), second]
store.removeCurrentFromQueue()
expect(store.queue[0].id).toBe('def')
})
it('restoreItem adds to front of queue', () => {
const store = useSftStore()
const second = makeMockItem({ id: 'def' })
store.queue = [second]
store.restoreItem(makeMockItem())
expect(store.queue[0].id).toBe('abc')
expect(store.queue[1].id).toBe('def')
})
it('setLastAction records the action', () => {
const store = useSftStore()
store.setLastAction('discard', makeMockItem())
expect(store.lastAction?.type).toBe('discard')
expect(store.lastAction?.item.id).toBe('abc')
})
it('clearLastAction nulls lastAction', () => {
const store = useSftStore()
store.setLastAction('flag', makeMockItem())
store.clearLastAction()
expect(store.lastAction).toBeNull()
})
})

58
web/src/stores/sft.ts Normal file
View file

@ -0,0 +1,58 @@
// src/stores/sft.ts
import { defineStore } from 'pinia'
import { computed, ref } from 'vue'
export interface SftQueueItem {
id: string
source: 'cf-orch-benchmark'
benchmark_run_id: string
timestamp: string
status: 'needs_review' | 'approved' | 'discarded' | 'model_rejected'
prompt_messages: { role: string; content: string }[]
model_response: string
corrected_response: string | null
quality_score: number // 0.0 to 1.0
failure_reason: string | null
task_id: string
task_type: string
task_name: string
model_id: string
model_name: string
node_id: string
gpu_id: number
tokens_per_sec: number
}
export interface SftLastAction {
type: 'correct' | 'discard' | 'flag'
item: SftQueueItem
}
export const useSftStore = defineStore('sft', () => {
const queue = ref<SftQueueItem[]>([])
const totalRemaining = ref(0)
const lastAction = ref<SftLastAction | null>(null)
const current = computed(() => queue.value[0] ?? null)
function removeCurrentFromQueue() {
queue.value.shift()
}
function setLastAction(type: SftLastAction['type'], item: SftQueueItem) {
lastAction.value = { type, item }
}
function clearLastAction() {
lastAction.value = null
}
function restoreItem(item: SftQueueItem) {
queue.value.unshift(item)
}
return {
queue, totalRemaining, lastAction, current,
removeCurrentFromQueue, setLastAction, clearLastAction, restoreItem,
}
})

View file

@ -0,0 +1,319 @@
<template>
<div class="corrections-view">
<header class="cv-header">
<span class="queue-count">
<template v-if="loading">Loading</template>
<template v-else-if="store.totalRemaining > 0">
{{ store.totalRemaining }} remaining
</template>
<span v-else class="queue-empty-label">All caught up</span>
</span>
<div class="header-actions">
<button @click="handleUndo" :disabled="!store.lastAction" class="btn-action"> Undo</button>
</div>
</header>
<!-- States -->
<div v-if="loading" class="skeleton-card" aria-label="Loading candidates" />
<div v-else-if="apiError" class="error-display" role="alert">
<p>Couldn't reach Avocet API.</p>
<button @click="fetchBatch" class="btn-action">Retry</button>
</div>
<div v-else-if="!store.current" class="empty-state">
<p>No candidates need review.</p>
<p class="empty-hint">Import a benchmark run from the Settings tab to get started.</p>
</div>
<template v-else>
<div class="card-wrapper">
<SftCard
:item="store.current"
:correcting="correcting"
@correct="startCorrection"
@discard="handleDiscard"
@flag="handleFlag"
@submit-correction="handleCorrect"
@cancel-correction="correcting = false"
/>
</div>
</template>
<!-- Stats footer -->
<footer v-if="stats" class="stats-footer">
<span class="stat"> {{ stats.by_status?.approved ?? 0 }} approved</span>
<span class="stat"> {{ stats.by_status?.discarded ?? 0 }} discarded</span>
<span class="stat"> {{ stats.by_status?.model_rejected ?? 0 }} flagged</span>
<a
v-if="(stats.export_ready ?? 0) > 0"
:href="exportUrl"
download
class="btn-export"
>
Export {{ stats.export_ready }} corrections
</a>
</footer>
<!-- Undo toast (inline UndoToast.vue uses label store's LastAction shape, not SFT's) -->
<div v-if="store.lastAction" class="undo-toast">
<span>Last: {{ store.lastAction.type }}</span>
<button @click="handleUndo" class="btn-undo"> Undo</button>
<button @click="store.clearLastAction()" class="btn-dismiss"></button>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, onMounted } from 'vue'
import { useSftStore } from '../stores/sft'
import { useSftKeyboard } from '../composables/useSftKeyboard'
import SftCard from '../components/SftCard.vue'
const store = useSftStore()
const loading = ref(false)
const apiError = ref(false)
const correcting = ref(false)
const stats = ref<Record<string, any> | null>(null)
const exportUrl = '/api/sft/export'
useSftKeyboard({
onCorrect: () => { if (store.current && !correcting.value) correcting.value = true },
onDiscard: () => { if (store.current && !correcting.value) handleDiscard() },
onFlag: () => { if (store.current && !correcting.value) handleFlag() },
onEscape: () => { correcting.value = false },
onSubmit: () => {},
isEditing: () => correcting.value,
})
async function fetchBatch() {
loading.value = true
apiError.value = false
try {
const res = await fetch('/api/sft/queue?per_page=20')
if (!res.ok) throw new Error('API error')
const data = await res.json()
store.queue = data.items
store.totalRemaining = data.total
} catch {
apiError.value = true
} finally {
loading.value = false
}
}
async function fetchStats() {
try {
const res = await fetch('/api/sft/stats')
if (res.ok) stats.value = await res.json()
} catch { /* ignore */ }
}
function startCorrection() {
correcting.value = true
}
async function handleCorrect(text: string) {
if (!store.current) return
const item = store.current
correcting.value = false
try {
const res = await fetch('/api/sft/submit', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ id: item.id, action: 'correct', corrected_response: text }),
})
if (!res.ok) throw new Error(`HTTP ${res.status}`)
store.removeCurrentFromQueue()
store.setLastAction('correct', item)
store.totalRemaining = Math.max(0, store.totalRemaining - 1)
fetchStats()
if (store.queue.length < 5) fetchBatch()
} catch (err) {
console.error('handleCorrect failed:', err)
}
}
async function handleDiscard() {
if (!store.current) return
const item = store.current
try {
const res = await fetch('/api/sft/submit', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ id: item.id, action: 'discard' }),
})
if (!res.ok) throw new Error(`HTTP ${res.status}`)
store.removeCurrentFromQueue()
store.setLastAction('discard', item)
store.totalRemaining = Math.max(0, store.totalRemaining - 1)
fetchStats()
if (store.queue.length < 5) fetchBatch()
} catch (err) {
console.error('handleDiscard failed:', err)
}
}
async function handleFlag() {
if (!store.current) return
const item = store.current
try {
const res = await fetch('/api/sft/submit', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ id: item.id, action: 'flag' }),
})
if (!res.ok) throw new Error(`HTTP ${res.status}`)
store.removeCurrentFromQueue()
store.setLastAction('flag', item)
store.totalRemaining = Math.max(0, store.totalRemaining - 1)
fetchStats()
if (store.queue.length < 5) fetchBatch()
} catch (err) {
console.error('handleFlag failed:', err)
}
}
async function handleUndo() {
if (!store.lastAction) return
const action = store.lastAction
const { item } = action
try {
const res = await fetch('/api/sft/undo', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ id: item.id }),
})
if (!res.ok) throw new Error(`HTTP ${res.status}`)
store.restoreItem(item)
store.totalRemaining++
store.clearLastAction()
fetchStats()
} catch (err) {
// Backend did not restore clear the undo UI without restoring queue state
console.error('handleUndo failed:', err)
store.clearLastAction()
}
}
onMounted(() => {
fetchBatch()
fetchStats()
})
</script>
<style scoped>
.corrections-view {
display: flex;
flex-direction: column;
min-height: 100dvh;
padding: var(--space-4);
gap: var(--space-4);
max-width: 760px;
margin: 0 auto;
}
.cv-header {
display: flex;
justify-content: space-between;
align-items: center;
}
.queue-count {
font-size: 1rem;
font-weight: 600;
color: var(--color-text);
}
.queue-empty-label { color: var(--color-text-muted); }
.btn-action {
padding: var(--space-2) var(--space-3);
border: 1px solid var(--color-border);
border-radius: var(--radius-md);
background: var(--color-surface-raised);
cursor: pointer;
font-size: 0.88rem;
}
.btn-action:disabled { opacity: 0.4; cursor: not-allowed; }
.skeleton-card {
height: 320px;
background: var(--color-surface-alt);
border-radius: var(--radius-lg);
animation: pulse 1.5s ease-in-out infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
@media (prefers-reduced-motion: reduce) {
.skeleton-card { animation: none; }
}
.error-display, .empty-state {
text-align: center;
padding: var(--space-12);
color: var(--color-text-muted);
}
.empty-hint { font-size: 0.88rem; margin-top: var(--space-2); }
.stats-footer {
display: flex;
gap: var(--space-4);
align-items: center;
flex-wrap: wrap;
padding: var(--space-3) 0;
border-top: 1px solid var(--color-border-light);
font-size: 0.85rem;
color: var(--color-text-muted);
}
.btn-export {
margin-left: auto;
padding: var(--space-2) var(--space-3);
background: var(--color-primary);
color: var(--color-text-inverse, #fff);
border-radius: var(--radius-md);
text-decoration: none;
font-size: 0.88rem;
}
.undo-toast {
position: fixed;
bottom: var(--space-6);
left: 50%;
transform: translateX(-50%);
display: flex;
align-items: center;
gap: var(--space-3);
background: var(--color-surface-raised);
border: 1px solid var(--color-border);
border-radius: var(--radius-md);
padding: var(--space-3) var(--space-4);
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
font-size: 0.9rem;
}
.btn-undo {
background: var(--color-primary);
color: var(--color-text-inverse, #fff);
border: none;
border-radius: var(--radius-sm);
padding: var(--space-1) var(--space-3);
cursor: pointer;
font-size: 0.88rem;
}
.btn-dismiss {
background: none;
border: none;
color: var(--color-text-muted);
cursor: pointer;
font-size: 1rem;
}
</style>

View file

@ -110,6 +110,63 @@
</label>
</section>
<!-- cf-orch SFT Integration section -->
<section class="section">
<h2 class="section-title">cf-orch Integration</h2>
<p class="section-desc">
Import SFT (supervised fine-tuning) candidates from cf-orch benchmark runs.
</p>
<div class="field-row">
<label class="field field-grow">
<span>bench_results_dir</span>
<input
id="bench-results-dir"
v-model="benchResultsDir"
type="text"
placeholder="/path/to/circuitforge-orch/scripts/bench_results"
/>
</label>
</div>
<div class="account-actions">
<button class="btn-primary" @click="saveSftConfig">Save</button>
<button class="btn-secondary" @click="scanRuns">Scan for runs</button>
<span v-if="saveStatus" class="save-status">{{ saveStatus }}</span>
</div>
<table v-if="runs.length > 0" class="runs-table">
<thead>
<tr>
<th>Timestamp</th>
<th>Candidates</th>
<th>Imported</th>
<th></th>
</tr>
</thead>
<tbody>
<tr v-for="run in runs" :key="run.run_id">
<td>{{ run.timestamp }}</td>
<td>{{ run.candidate_count }}</td>
<td>{{ run.already_imported ? '✓' : '—' }}</td>
<td>
<button
class="btn-import"
:disabled="run.already_imported || importingRunId === run.run_id"
@click="importRun(run.run_id)"
>
{{ importingRunId === run.run_id ? 'Importing…' : 'Import' }}
</button>
</td>
</tr>
</tbody>
</table>
<div v-if="importResult" class="import-result">
Imported {{ importResult.imported }}, skipped {{ importResult.skipped }}.
</div>
</section>
<!-- Save / Reload -->
<div class="save-bar">
<button class="btn-primary" :disabled="saving" @click="save">
@ -142,6 +199,64 @@ const saveOk = ref(true)
const richMotion = ref(localStorage.getItem('cf-avocet-rich-motion') !== 'false')
const keyHints = ref(localStorage.getItem('cf-avocet-key-hints') !== 'false')
// SFT integration state
const benchResultsDir = ref('')
const runs = ref<Array<{ run_id: string; timestamp: string; candidate_count: number; already_imported: boolean }>>([])
const importingRunId = ref<string | null>(null)
const importResult = ref<{ imported: number; skipped: number } | null>(null)
const saveStatus = ref('')
async function loadSftConfig() {
try {
const res = await fetch('/api/sft/config')
if (res.ok) {
const data = await res.json()
benchResultsDir.value = data.bench_results_dir ?? ''
}
} catch {
// non-fatal leave field empty
}
}
async function saveSftConfig() {
saveStatus.value = 'Saving…'
try {
const res = await fetch('/api/sft/config', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ bench_results_dir: benchResultsDir.value }),
})
saveStatus.value = res.ok ? 'Saved.' : 'Error saving.'
} catch {
saveStatus.value = 'Error saving.'
}
setTimeout(() => { saveStatus.value = '' }, 2000)
}
async function scanRuns() {
try {
const res = await fetch('/api/sft/runs')
if (res.ok) runs.value = await res.json()
} catch { /* ignore */ }
}
async function importRun(runId: string) {
importingRunId.value = runId
importResult.value = null
try {
const res = await fetch('/api/sft/import', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ run_id: runId }),
})
if (res.ok) {
importResult.value = await res.json()
scanRuns() // refresh already_imported flags
}
} catch { /* ignore */ }
importingRunId.value = null
}
async function reload() {
const { data } = await useApiFetch<{ accounts: Account[]; max_per_account: number }>('/api/config')
if (data) {
@ -219,7 +334,10 @@ function onKeyHintsChange() {
document.documentElement.classList.toggle('hide-key-hints', !keyHints.value)
}
onMounted(reload)
onMounted(() => {
reload()
loadSftConfig()
})
</script>
<style scoped>
@ -428,4 +546,61 @@ onMounted(reload)
border: 1px dashed var(--color-border, #d0d7e8);
border-radius: 0.5rem;
}
.section-desc {
color: var(--color-text-secondary, #6b7a99);
font-size: 0.88rem;
line-height: 1.5;
}
.field-input {
padding: 0.4rem 0.6rem;
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.375rem;
background: var(--color-surface, #fff);
color: var(--color-text, #1a2338);
font-size: 0.9rem;
font-family: var(--font-body, sans-serif);
width: 100%;
}
.runs-table {
width: 100%;
border-collapse: collapse;
margin-top: var(--space-3, 0.75rem);
font-size: 0.88rem;
}
.runs-table th,
.runs-table td {
padding: var(--space-2, 0.5rem) var(--space-3, 0.75rem);
text-align: left;
border-bottom: 1px solid var(--color-border, #d0d7e8);
}
.btn-import {
padding: var(--space-1, 0.25rem) var(--space-3, 0.75rem);
border: 1px solid var(--app-primary, #2A6080);
border-radius: var(--radius-sm, 0.25rem);
background: none;
color: var(--app-primary, #2A6080);
cursor: pointer;
font-size: 0.85rem;
}
.btn-import:disabled {
opacity: 0.45;
cursor: not-allowed;
}
.import-result {
margin-top: var(--space-2, 0.5rem);
font-size: 0.88rem;
color: var(--color-text-secondary, #6b7a99);
}
.save-status {
font-size: 0.85rem;
color: var(--color-text-secondary, #6b7a99);
}
</style>