turnstone/tests/context/test_doc_upload.py
pyr0ball 0311d72e53 feat: dual-backend SQLite/Postgres + multi-tenant source namespacing
- Add app/db/ abstraction layer: Backend enum, DbConn wrapper,
  dialect helper (q() for ? vs %s paramstyle), get_conn(), tenant_id()
- Auto-detect backend from DATABASE_URL; SQLite remains default when
  unset — no config change for local deployments
- Add tenant_id column to all three logical DBs (main, context, incidents);
  idempotent ALTER TABLE migration runs before schema scripts on existing DBs
- All INSERTs inject tenant_id; SELECTs use (tenant_id = ? OR tenant_id = '')
  for backward compat with pre-namespacing rows
- Add docker-compose.yml with named volume turnstone_pgdata (survives rebuilds)
  and optional external Postgres support via DATABASE_URL override
- Add scripts/migrate_sqlite_to_postgres.py — one-shot idempotent migration
  for existing SQLite data; ON CONFLICT DO NOTHING for safe re-runs
- Fix SSH glean path in pipeline.py to use ensure_schema + get_conn
  (was still using raw sqlite3.connect + old _SCHEMA without tenant_id)
- Fix FTS5 JOIN ambiguity: qualify repeat_count as f.repeat_count in search
- Update all tests to use ensure_*_schema fixtures; add row_factory where needed
- 394/394 tests passing

Closes: #42
Closes: #50
2026-06-08 08:37:54 -07:00

47 lines
1.4 KiB
Python

"""End-to-end upload pipeline: file bytes → DB rows."""
import pytest
from pathlib import Path
from app.db.schema import ensure_context_schema
from app.glean.doc_upload import glean_upload
from app.context.store import list_facts, list_documents
from app.context.chunker import UnsupportedDocType
@pytest.fixture
def db(tmp_path):
db_path = tmp_path / "t.db"
ensure_context_schema(db_path)
return db_path
def test_ingest_yaml_creates_facts_and_doc(db):
yaml_bytes = b"""
services:
plex:
image: plexinc/pms-docker
ports:
- "32400:32400"
"""
result = glean_upload(db, "docker-compose.yml", yaml_bytes)
assert result["doc_type"] == "yaml"
assert result["facts_written"] >= 1
assert result["chunks_written"] >= 1
docs = list_documents(db)
assert len(docs) == 1
assert docs[0].filename == "docker-compose.yml"
facts = list_facts(db, category="service")
assert any(f.key == "plex" for f in facts)
def test_ingest_markdown_no_facts(db):
md = b"# Runbook\n\nRestart plex with `systemctl restart plex`."
result = glean_upload(db, "runbook.md", md)
assert result["doc_type"] == "markdown"
assert result["facts_written"] == 0
assert result["chunks_written"] >= 1
def test_ingest_raises_on_bad_type(db):
with pytest.raises(UnsupportedDocType):
glean_upload(db, "report.pdf", b"data")