turnstone/app/glean/doc_upload.py

"""Upload adapter: processes file bytes and writes to context store — MIT licensed."""
from __future__ import annotations

import uuid
from pathlib import Path
from typing import Any

from app.context.chunker import process_upload
from app.context.store import add_document, add_fact
from app.db import get_conn, resolve_tenant_id


def glean_upload(db_path: Path, filename: str, content: bytes) -> dict[str, Any]:
    """Process an uploaded file and write to context store. Returns result summary."""
    doc_type, facts, chunks = process_upload(filename, content)
    tid = resolve_tenant_id()

    doc = add_document(
        db_path,
        filename=filename,
        doc_type=doc_type,
        full_text=content.decode("utf-8", errors="replace"),
        file_size=len(content),
    )

    for fact in facts:
        add_fact(db_path, fact.category, fact.key, fact.value, source="upload")

    with get_conn(db_path) as conn:
        for i, chunk_text in enumerate(chunks):
            conn.execute(
                "INSERT INTO context_chunks(id, tenant_id, document_id, chunk_index, text) VALUES (?,?,?,?,?)",
                (str(uuid.uuid4()), tid, doc.id, i, chunk_text),
            )
        conn.commit()

    return {
        "document_id": doc.id,
        "doc_type": doc_type,
        "facts_written": len(facts),
        "chunks_written": len(chunks),
    }