- Add app/db/ abstraction layer: Backend enum, DbConn wrapper, dialect helper (q() for ? vs %s paramstyle), get_conn(), tenant_id() - Auto-detect backend from DATABASE_URL; SQLite remains default when unset — no config change for local deployments - Add tenant_id column to all three logical DBs (main, context, incidents); idempotent ALTER TABLE migration runs before schema scripts on existing DBs - All INSERTs inject tenant_id; SELECTs use (tenant_id = ? OR tenant_id = '') for backward compat with pre-namespacing rows - Add docker-compose.yml with named volume turnstone_pgdata (survives rebuilds) and optional external Postgres support via DATABASE_URL override - Add scripts/migrate_sqlite_to_postgres.py — one-shot idempotent migration for existing SQLite data; ON CONFLICT DO NOTHING for safe re-runs - Fix SSH glean path in pipeline.py to use ensure_schema + get_conn (was still using raw sqlite3.connect + old _SCHEMA without tenant_id) - Fix FTS5 JOIN ambiguity: qualify repeat_count as f.repeat_count in search - Update all tests to use ensure_*_schema fixtures; add row_factory where needed - 394/394 tests passing Closes: #42 Closes: #50
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
"""Upload adapter: processes file bytes and writes to context store — MIT licensed."""
|
|
from __future__ import annotations
|
|
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from app.context.chunker import process_upload
|
|
from app.context.store import add_document, add_fact
|
|
from app.db import get_conn, resolve_tenant_id
|
|
|
|
|
|
def glean_upload(db_path: Path, filename: str, content: bytes) -> dict[str, Any]:
|
|
"""Process an uploaded file and write to context store. Returns result summary."""
|
|
doc_type, facts, chunks = process_upload(filename, content)
|
|
tid = resolve_tenant_id()
|
|
|
|
doc = add_document(
|
|
db_path,
|
|
filename=filename,
|
|
doc_type=doc_type,
|
|
full_text=content.decode("utf-8", errors="replace"),
|
|
file_size=len(content),
|
|
)
|
|
|
|
for fact in facts:
|
|
add_fact(db_path, fact.category, fact.key, fact.value, source="upload")
|
|
|
|
with get_conn(db_path) as conn:
|
|
for i, chunk_text in enumerate(chunks):
|
|
conn.execute(
|
|
"INSERT INTO context_chunks(id, tenant_id, document_id, chunk_index, text) VALUES (?,?,?,?,?)",
|
|
(str(uuid.uuid4()), tid, doc.id, i, chunk_text),
|
|
)
|
|
conn.commit()
|
|
|
|
return {
|
|
"document_id": doc.id,
|
|
"doc_type": doc_type,
|
|
"facts_written": len(facts),
|
|
"chunks_written": len(chunks),
|
|
}
|