turnstone/web/src/views/SourcesView.vue
pyr0ball 41fc89c474 feat: SSH remote glean — transport layer, pipeline integration, REST + UI (#22)
Closes turnstone#22.

## Transport layer (app/glean/ssh.py)
- SSHTransport context manager: key-only auth, paramiko backend
- SSHConnectionError / SSHCommandError exception hierarchy
- exec_stream() generator: yields stdout lines, raises SSHCommandError on
  non-zero exit (isinstance(int) guard for test-mock safety)
- Command builders: _build_journald_command, _build_syslog_command,
  _build_plaintext_command, _build_docker_command
- 18 unit tests in tests/test_glean_ssh.py

## Pipeline integration (app/glean/pipeline.py)
- _stream_and_write(): per-item error isolation — SSHCommandError skips
  one glean item without aborting the rest of the host connection
- _glean_ssh_source(): one SSHTransport per host, dispatches all glean
  items (journald/syslog/plaintext/docker); SSHConnectionError aborts host
- glean_sources(): splits local vs SSH sources; local → _glean_files();
  SSH → _glean_ssh_source(); shared compiled patterns and DB connection
- glean_ssh_source(): public wrapper for REST use — manages DB connection,
  pattern compilation, FTS rebuild lifecycle
- 15 integration tests in tests/test_glean_pipeline_ssh.py
- All 285 tests passing

## REST layer (app/rest.py)
- GET /api/sources/configured: reads sources.yaml and enriches with DB
  stats; SSH sources appear before first glean (entry_count=0); sub-source
  IDs (rack01/journald, rack01/docker/myapp) aggregated per host entry
- POST /api/sources/{id}/glean: detects transport:ssh and dispatches to
  glean_ssh_source() wrapper; local sources unchanged
- Import: glean_ssh_source as _glean_ssh_source

## Frontend (web/src/views/SourcesView.vue)
- Fetches /api/sources/configured (primary) + /api/sources (DB-only) in
  parallel; merges into unified SourceRow list
- SSH sources show: ssh badge (with user@host tooltip), glean-type pills
  (journald/syslog/docker/etc.), host subtitle
- SSH sub-source IDs (rack01/journald) suppressed from the DB-only list
  since they are covered by the parent SSH row
- DB-only sources (uploads) appear below configured sources with 'uploaded'
  badge; reglean button disabled (not in sources.yaml)
- Delete zeroes out configured-source stats in-place rather than removing
  the row (so the source remains visible for re-gleaning)
2026-05-21 12:37:30 -07:00

300 lines
11 KiB
Vue

<template>
<div class="p-4 sm:p-6 max-w-5xl mx-auto">
<div class="mb-6 flex items-start justify-between gap-4">
<div>
<h1 class="text-text-primary text-xl font-semibold mb-1">Log Sources</h1>
<p class="text-text-dim text-sm">All hosts and services in the gleaned corpus.</p>
</div>
<label class="btn-secondary text-sm cursor-pointer shrink-0">
<span>Upload log file</span>
<input type="file" class="hidden" @change="handleUpload" />
</label>
</div>
<!-- Upload / action feedback -->
<div v-if="actionMsg" class="mb-4 text-sm rounded border px-4 py-2.5"
:class="actionError ? 'border-sev-error text-sev-error bg-surface-raised' : 'border-accent text-accent bg-surface-raised'">
{{ actionMsg }}
</div>
<div v-if="loading" class="text-text-dim py-8 text-center text-sm">Loading</div>
<div v-else-if="sources.length === 0" class="text-text-dim py-12 text-center">
<p class="mb-1">No log sources found.</p>
<p class="text-sm">Run the glean pipeline: <code class="bg-surface-raised px-1 rounded">python scripts/glean_corpus.py</code></p>
</div>
<div v-else class="rounded border border-surface-border overflow-hidden">
<div class="overflow-x-auto">
<table class="w-full text-sm min-w-[620px]">
<thead class="bg-surface-raised border-b border-surface-border">
<tr>
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider">Source</th>
<th class="text-right px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider">Entries</th>
<th class="text-right px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider">Errors</th>
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider">Earliest</th>
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider">Latest</th>
<th class="px-4 py-2.5"></th>
</tr>
</thead>
<tbody>
<tr
v-for="src in sources"
:key="src.id"
class="border-b border-surface-border hover:bg-surface-raised transition-colors"
>
<!-- Source name + badges -->
<td class="px-4 py-2.5">
<div class="flex flex-wrap items-center gap-1.5">
<span class="text-accent font-mono text-xs">{{ src.id }}</span>
<!-- SSH transport badge -->
<span
v-if="src.transport === 'ssh'"
class="inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium
bg-blue-900/30 text-blue-400 border border-blue-800/40"
:title="`SSH: ${src.user}@${src.host}`"
>
<svg class="w-2.5 h-2.5" viewBox="0 0 16 16" fill="currentColor" aria-hidden="true">
<path d="M2 3a1 1 0 011-1h10a1 1 0 011 1v2a1 1 0 01-1 1H3a1 1 0 01-1-1V3zm0 5a1 1 0 011-1h4a1 1 0 110 2H3a1 1 0 01-1-1zm0 4a1 1 0 011-1h2a1 1 0 110 2H3a1 1 0 01-1-1z"/>
</svg>
ssh
</span>
<!-- Glean-type pills for SSH sources -->
<span
v-for="gtype in (src.glean_types ?? [])"
:key="gtype"
class="px-1.5 py-0.5 rounded text-[10px] font-medium
bg-surface-raised text-text-dim border border-surface-border"
>{{ gtype }}</span>
<!-- Upload badge for DB-only sources not in sources.yaml -->
<span
v-if="src.dbOnly"
class="px-1.5 py-0.5 rounded text-[10px] font-medium
bg-surface-raised text-text-dim border border-surface-border"
>uploaded</span>
</div>
<!-- SSH host subtitle -->
<div v-if="src.transport === 'ssh'" class="text-text-dim text-xs mt-0.5 font-mono">
{{ src.user }}@{{ src.host }}
</div>
</td>
<!-- Entry count -->
<td class="px-4 py-2.5 text-text-muted text-right tabular-nums">
{{ src.entry_count.toLocaleString() }}
</td>
<!-- Error count -->
<td class="px-4 py-2.5 text-right tabular-nums">
<span :class="src.error_count > 0 ? 'text-sev-error' : 'text-text-dim'">
{{ src.error_count.toLocaleString() }}
</span>
</td>
<td class="px-4 py-2.5 text-text-dim text-xs">{{ formatTs(src.earliest) }}</td>
<td class="px-4 py-2.5 text-text-dim text-xs">{{ formatTs(src.latest) }}</td>
<!-- Actions -->
<td class="px-4 py-2.5">
<div class="flex items-center justify-end gap-2">
<button
:disabled="busy.has(src.id) || src.dbOnly"
@click="reglean(src.id)"
class="text-text-dim hover:text-accent transition-colors text-xs px-2 py-1 rounded hover:bg-surface disabled:opacity-40"
:title="src.dbOnly ? 'Not in sources.yaml — cannot re-glean' : 'Re-glean from sources.yaml'"
>{{ busy.has(src.id) ? '…' : 'reglean' }}</button>
<button
:disabled="busy.has(src.id)"
@click="deleteSource(src.id)"
class="text-text-dim hover:text-sev-error transition-colors text-xs px-2 py-1 rounded hover:bg-surface disabled:opacity-40"
title="Delete all entries for this source"
>delete</button>
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, onMounted } from 'vue'
// Unified source row shown in the table (merges configured + DB-only sources).
interface SourceRow {
id: string
transport: 'local' | 'ssh'
// SSH-specific
host?: string
user?: string
glean_types?: string[]
// Local-specific
path?: string
// DB stats (always present, default 0/null)
entry_count: number
error_count: number
earliest: string | null
latest: string | null
// True when this source exists in the DB but not in sources.yaml (e.g. uploads)
dbOnly?: boolean
}
interface ConfiguredSource extends Omit<SourceRow, 'dbOnly'> {}
interface DbSource {
source_id: string
entry_count: number
error_count: number
earliest: string | null
latest: string | null
}
const sources = ref<SourceRow[]>([])
const loading = ref(true)
const busy = ref(new Set<string>())
const actionMsg = ref('')
const actionError = ref(false)
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
async function loadSources(): Promise<void> {
try {
// Primary list: configured sources from sources.yaml (enriched with DB stats).
// This makes SSH sources visible even before their first glean.
const [configuredRes, dbRes] = await Promise.all([
fetch(`${BASE}/api/sources/configured`),
fetch(`${BASE}/api/sources`),
])
const configuredData = configuredRes.ok ? await configuredRes.json() : { sources: [] }
const dbData = dbRes.ok ? await dbRes.json() : { sources: [] }
const configuredSources: ConfiguredSource[] = configuredData.sources ?? []
const dbSources: DbSource[] = dbData.sources ?? []
// Build a set of all IDs represented by configured sources.
// SSH sources own all sub-source IDs like "rack01/journald" too.
const coveredIds = new Set<string>()
for (const s of configuredSources) {
coveredIds.add(s.id)
}
// For SSH sources, also mark sub-source IDs (rack01/…) as covered so they
// don't appear as separate "uploaded" rows.
for (const s of configuredSources) {
if (s.transport === 'ssh') {
for (const db of dbSources) {
if (db.source_id.startsWith(s.id + '/') || db.source_id === s.id) {
coveredIds.add(db.source_id)
}
}
}
}
// DB-only sources: uploaded files or manually gleaned sources not in sources.yaml.
const dbOnly: SourceRow[] = dbSources
.filter(db => !coveredIds.has(db.source_id))
.map(db => ({
id: db.source_id,
transport: 'local' as const,
entry_count: db.entry_count,
error_count: db.error_count,
earliest: db.earliest,
latest: db.latest,
dbOnly: true,
}))
sources.value = [...configuredSources as SourceRow[], ...dbOnly]
} finally {
loading.value = false
}
}
onMounted(loadSources)
function setBusy(id: string, on: boolean): void {
const next = new Set(busy.value)
on ? next.add(id) : next.delete(id)
busy.value = next
}
async function deleteSource(sourceId: string): Promise<void> {
if (!confirm(`Delete all entries for "${sourceId}"? This cannot be undone.`)) return
setBusy(sourceId, true)
actionMsg.value = ''
try {
const res = await fetch(`${BASE}/api/sources/${encodeURIComponent(sourceId)}`, { method: 'DELETE' })
if (res.ok) {
const data = await res.json()
actionMsg.value = `Deleted ${data.deleted.toLocaleString()} entries for "${sourceId}"`
actionError.value = false
// Remove DB-only rows; zero-out configured-source stats instead of hiding.
sources.value = sources.value
.filter(s => !(s.id === sourceId && s.dbOnly))
.map(s => s.id === sourceId
? { ...s, entry_count: 0, error_count: 0, earliest: null, latest: null }
: s
)
} else {
const data = await res.json()
actionMsg.value = data.detail ?? 'Delete failed'
actionError.value = true
}
} finally {
setBusy(sourceId, false)
}
}
async function reglean(sourceId: string): Promise<void> {
setBusy(sourceId, true)
actionMsg.value = ''
actionError.value = false
try {
const res = await fetch(`${BASE}/api/sources/${encodeURIComponent(sourceId)}/glean`, { method: 'POST' })
const data = await res.json()
if (res.ok) {
actionMsg.value = `Re-glean complete: ${data.gleaned.toLocaleString()} new entries for "${sourceId}"`
actionError.value = false
await loadSources()
} else {
actionMsg.value = data.detail ?? 'Re-glean failed'
actionError.value = true
}
} finally {
setBusy(sourceId, false)
}
}
async function handleUpload(e: Event): Promise<void> {
const file = (e.target as HTMLInputElement).files?.[0]
if (!file) return
actionMsg.value = 'Uploading…'
actionError.value = false
const form = new FormData()
form.append('file', file)
const res = await fetch(`${BASE}/api/glean/upload`, { method: 'POST', body: form })
const data = await res.json()
if (res.ok) {
actionMsg.value = `Uploaded: ${data.gleaned.toLocaleString()} entries gleaned as "${data.source_id}"`
actionError.value = false
await loadSources()
} else {
actionMsg.value = data.detail ?? 'Upload failed'
actionError.value = true
}
;(e.target as HTMLInputElement).value = ''
}
function formatTs(iso: string | null): string {
if (!iso) return '—'
try {
return new Date(iso).toLocaleString(undefined, {
year: 'numeric', month: 'short', day: 'numeric',
hour: '2-digit', minute: '2-digit',
})
} catch {
return iso
}
}
</script>