Renames the app/ingest/ package to app/glean/ and updates all references across Python modules, shell scripts, Vue components, tests, and documentation. Intentionally preserved: - SQLite column name ingest_time (avoids schema migration) - RetrievedEntry.ingest_time field (maps to the column above) - Any public-facing JSON keys that reference ingest_time Changes by category: - app/ingest/ → app/glean/ (full package move, all parsers) - app/tasks/ingest_scheduler.py → app/tasks/glean_scheduler.py - scripts/ingest_corpus.py → scripts/glean_corpus.py - tests/test_ingest_*.py → tests/test_glean_*.py - Docstrings, log messages, comments: ingest → glean - Env var: TURNSTONE_INGEST_INTERVAL → TURNSTONE_GLEAN_INTERVAL - Shell scripts: glean.log, glean_corpus.py references - README.md: multi-source ingest → multi-source glean - .env.example: updated env var name - patterns/: new diagnostic patterns from 2026-05-20 SSH incident (service_crash_loop, pkg_daemon_restart, ssh_forward_conflict) - SourcesView.vue: pipeline label updated - All test import paths updated to app.glean.* 285 tests passing.
64 lines
2 KiB
Python
64 lines
2 KiB
Python
"""End-to-end upload pipeline: file bytes → DB rows."""
|
|
import sqlite3
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
from app.glean.doc_upload import glean_upload
|
|
from app.context.store import list_facts, list_documents
|
|
from app.context.chunker import UnsupportedDocType
|
|
|
|
|
|
@pytest.fixture
|
|
def db(tmp_path):
|
|
db_path = tmp_path / "t.db"
|
|
conn = sqlite3.connect(str(db_path))
|
|
conn.executescript("""
|
|
CREATE TABLE context_facts (
|
|
id TEXT PRIMARY KEY, category TEXT NOT NULL, key TEXT NOT NULL,
|
|
value TEXT NOT NULL, source TEXT, created_at TEXT NOT NULL
|
|
);
|
|
CREATE TABLE context_documents (
|
|
id TEXT PRIMARY KEY, filename TEXT NOT NULL, doc_type TEXT NOT NULL,
|
|
full_text TEXT NOT NULL, file_size INTEGER, uploaded_at TEXT NOT NULL
|
|
);
|
|
CREATE TABLE context_chunks (
|
|
id TEXT PRIMARY KEY, document_id TEXT NOT NULL
|
|
REFERENCES context_documents(id) ON DELETE CASCADE,
|
|
chunk_index INTEGER NOT NULL, text TEXT NOT NULL, embedding BLOB
|
|
);
|
|
""")
|
|
conn.commit()
|
|
conn.close()
|
|
return db_path
|
|
|
|
|
|
def test_ingest_yaml_creates_facts_and_doc(db):
|
|
yaml_bytes = b"""
|
|
services:
|
|
plex:
|
|
image: plexinc/pms-docker
|
|
ports:
|
|
- "32400:32400"
|
|
"""
|
|
result = glean_upload(db, "docker-compose.yml", yaml_bytes)
|
|
assert result["doc_type"] == "yaml"
|
|
assert result["facts_written"] >= 1
|
|
assert result["chunks_written"] >= 1
|
|
docs = list_documents(db)
|
|
assert len(docs) == 1
|
|
assert docs[0].filename == "docker-compose.yml"
|
|
facts = list_facts(db, category="service")
|
|
assert any(f.key == "plex" for f in facts)
|
|
|
|
|
|
def test_ingest_markdown_no_facts(db):
|
|
md = b"# Runbook\n\nRestart plex with `systemctl restart plex`."
|
|
result = glean_upload(db, "runbook.md", md)
|
|
assert result["doc_type"] == "markdown"
|
|
assert result["facts_written"] == 0
|
|
assert result["chunks_written"] >= 1
|
|
|
|
|
|
def test_ingest_raises_on_bad_type(db):
|
|
with pytest.raises(UnsupportedDocType):
|
|
glean_upload(db, "report.pdf", b"data")
|