feat: doc upload adapter — writes facts, document, and chunks to context store
This commit is contained in:
parent
b23a60a602
commit
ebbb1af32d
2 changed files with 107 additions and 0 deletions
43
app/ingest/doc_upload.py
Normal file
43
app/ingest/doc_upload.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
"""Upload adapter: processes file bytes and writes to context store — MIT licensed."""
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from app.context.chunker import process_upload
|
||||
from app.context.store import add_document, add_fact
|
||||
|
||||
|
||||
def ingest_upload(db_path: Path, filename: str, content: bytes) -> dict[str, Any]:
|
||||
"""Process an uploaded file and write to context store. Returns result summary."""
|
||||
doc_type, facts, chunks = process_upload(filename, content)
|
||||
|
||||
doc = add_document(
|
||||
db_path,
|
||||
filename=filename,
|
||||
doc_type=doc_type,
|
||||
full_text=content.decode("utf-8", errors="replace"),
|
||||
file_size=len(content),
|
||||
)
|
||||
|
||||
for fact in facts:
|
||||
add_fact(db_path, fact.category, fact.key, fact.value, source="upload")
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
for i, chunk_text in enumerate(chunks):
|
||||
conn.execute(
|
||||
"INSERT INTO context_chunks(id, document_id, chunk_index, text) VALUES (?,?,?,?)",
|
||||
(str(uuid.uuid4()), doc.id, i, chunk_text),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
"document_id": doc.id,
|
||||
"doc_type": doc_type,
|
||||
"facts_written": len(facts),
|
||||
"chunks_written": len(chunks),
|
||||
}
|
||||
64
tests/context/test_doc_upload.py
Normal file
64
tests/context/test_doc_upload.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
"""End-to-end upload pipeline: file bytes → DB rows."""
|
||||
import sqlite3
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
from app.ingest.doc_upload import ingest_upload
|
||||
from app.context.store import list_facts, list_documents
|
||||
from app.context.chunker import UnsupportedDocType
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path):
|
||||
db_path = tmp_path / "t.db"
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.executescript("""
|
||||
CREATE TABLE context_facts (
|
||||
id TEXT PRIMARY KEY, category TEXT NOT NULL, key TEXT NOT NULL,
|
||||
value TEXT NOT NULL, source TEXT, created_at TEXT NOT NULL
|
||||
);
|
||||
CREATE TABLE context_documents (
|
||||
id TEXT PRIMARY KEY, filename TEXT NOT NULL, doc_type TEXT NOT NULL,
|
||||
full_text TEXT NOT NULL, file_size INTEGER, uploaded_at TEXT NOT NULL
|
||||
);
|
||||
CREATE TABLE context_chunks (
|
||||
id TEXT PRIMARY KEY, document_id TEXT NOT NULL
|
||||
REFERENCES context_documents(id) ON DELETE CASCADE,
|
||||
chunk_index INTEGER NOT NULL, text TEXT NOT NULL, embedding BLOB
|
||||
);
|
||||
""")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return db_path
|
||||
|
||||
|
||||
def test_ingest_yaml_creates_facts_and_doc(db):
|
||||
yaml_bytes = b"""
|
||||
services:
|
||||
plex:
|
||||
image: plexinc/pms-docker
|
||||
ports:
|
||||
- "32400:32400"
|
||||
"""
|
||||
result = ingest_upload(db, "docker-compose.yml", yaml_bytes)
|
||||
assert result["doc_type"] == "yaml"
|
||||
assert result["facts_written"] >= 1
|
||||
assert result["chunks_written"] >= 1
|
||||
docs = list_documents(db)
|
||||
assert len(docs) == 1
|
||||
assert docs[0].filename == "docker-compose.yml"
|
||||
facts = list_facts(db, category="service")
|
||||
assert any(f.key == "plex" for f in facts)
|
||||
|
||||
|
||||
def test_ingest_markdown_no_facts(db):
|
||||
md = b"# Runbook\n\nRestart plex with `systemctl restart plex`."
|
||||
result = ingest_upload(db, "runbook.md", md)
|
||||
assert result["doc_type"] == "markdown"
|
||||
assert result["facts_written"] == 0
|
||||
assert result["chunks_written"] >= 1
|
||||
|
||||
|
||||
def test_ingest_raises_on_bad_type(db):
|
||||
with pytest.raises(UnsupportedDocType):
|
||||
ingest_upload(db, "report.pdf", b"data")
|
||||
Loading…
Reference in a new issue