turnstone/tests/context/test_doc_upload.py

64 lines
2 KiB
Python

"""End-to-end upload pipeline: file bytes → DB rows."""
import sqlite3
import pytest
from pathlib import Path
from app.ingest.doc_upload import ingest_upload
from app.context.store import list_facts, list_documents
from app.context.chunker import UnsupportedDocType
@pytest.fixture
def db(tmp_path):
db_path = tmp_path / "t.db"
conn = sqlite3.connect(str(db_path))
conn.executescript("""
CREATE TABLE context_facts (
id TEXT PRIMARY KEY, category TEXT NOT NULL, key TEXT NOT NULL,
value TEXT NOT NULL, source TEXT, created_at TEXT NOT NULL
);
CREATE TABLE context_documents (
id TEXT PRIMARY KEY, filename TEXT NOT NULL, doc_type TEXT NOT NULL,
full_text TEXT NOT NULL, file_size INTEGER, uploaded_at TEXT NOT NULL
);
CREATE TABLE context_chunks (
id TEXT PRIMARY KEY, document_id TEXT NOT NULL
REFERENCES context_documents(id) ON DELETE CASCADE,
chunk_index INTEGER NOT NULL, text TEXT NOT NULL, embedding BLOB
);
""")
conn.commit()
conn.close()
return db_path
def test_ingest_yaml_creates_facts_and_doc(db):
yaml_bytes = b"""
services:
plex:
image: plexinc/pms-docker
ports:
- "32400:32400"
"""
result = ingest_upload(db, "docker-compose.yml", yaml_bytes)
assert result["doc_type"] == "yaml"
assert result["facts_written"] >= 1
assert result["chunks_written"] >= 1
docs = list_documents(db)
assert len(docs) == 1
assert docs[0].filename == "docker-compose.yml"
facts = list_facts(db, category="service")
assert any(f.key == "plex" for f in facts)
def test_ingest_markdown_no_facts(db):
md = b"# Runbook\n\nRestart plex with `systemctl restart plex`."
result = ingest_upload(db, "runbook.md", md)
assert result["doc_type"] == "markdown"
assert result["facts_written"] == 0
assert result["chunks_written"] >= 1
def test_ingest_raises_on_bad_type(db):
with pytest.raises(UnsupportedDocType):
ingest_upload(db, "report.pdf", b"data")