From bae889ddf219a49e7c43c414b260ff862b7e4f07 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 13 May 2026 15:51:19 -0700 Subject: [PATCH] feat: add context_facts, context_documents, context_chunks tables to schema --- app/ingest/pipeline.py | 29 +++++++++++++++++++++++++++++ tests/context/__init__.py | 0 tests/context/test_schema.py | 24 ++++++++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 tests/context/__init__.py create mode 100644 tests/context/test_schema.py diff --git a/app/ingest/pipeline.py b/app/ingest/pipeline.py index bddaf43..cf1a16a 100644 --- a/app/ingest/pipeline.py +++ b/app/ingest/pipeline.py @@ -62,6 +62,35 @@ CREATE TABLE IF NOT EXISTS received_bundles ( ); CREATE INDEX IF NOT EXISTS idx_bundles_bundled ON received_bundles(bundled_at); CREATE INDEX IF NOT EXISTS idx_bundles_type ON received_bundles(issue_type); + +CREATE TABLE IF NOT EXISTS context_facts ( + id TEXT PRIMARY KEY, + category TEXT NOT NULL, + key TEXT NOT NULL, + value TEXT NOT NULL, + source TEXT, + created_at TEXT NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_facts_category ON context_facts(category); +CREATE INDEX IF NOT EXISTS idx_facts_key ON context_facts(key); + +CREATE TABLE IF NOT EXISTS context_documents ( + id TEXT PRIMARY KEY, + filename TEXT NOT NULL, + doc_type TEXT NOT NULL, + full_text TEXT NOT NULL, + file_size INTEGER, + uploaded_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS context_chunks ( + id TEXT PRIMARY KEY, + document_id TEXT NOT NULL REFERENCES context_documents(id) ON DELETE CASCADE, + chunk_index INTEGER NOT NULL, + text TEXT NOT NULL, + embedding BLOB +); +CREATE INDEX IF NOT EXISTS idx_chunks_doc ON context_chunks(document_id); """ diff --git a/tests/context/__init__.py b/tests/context/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/context/test_schema.py b/tests/context/test_schema.py new file mode 100644 index 0000000..69b0327 --- /dev/null +++ b/tests/context/test_schema.py @@ -0,0 +1,24 @@ +"""Verify the three new context tables are created by ensure_schema.""" +import sqlite3 +from pathlib import Path +import pytest +from app.ingest.pipeline import ensure_schema + + +def test_context_tables_created(tmp_path): + db = tmp_path / "t.db" + ensure_schema(db) + conn = sqlite3.connect(str(db)) + tables = {r[0] for r in conn.execute( + "SELECT name FROM sqlite_master WHERE type='table'" + ).fetchall()} + conn.close() + assert "context_facts" in tables + assert "context_documents" in tables + assert "context_chunks" in tables + + +def test_context_schema_idempotent(tmp_path): + db = tmp_path / "t.db" + ensure_schema(db) + ensure_schema(db) # second call must not raise