Watcher, REST endpoints, services (search, incidents, blocklist),
MCP server, context retriever, embedder, glean_scheduler, and
doc_upload all used the default 5-second SQLite busy timeout.
During collect glean write phases, watcher flush threads were hitting
'database is locked' errors when the glean held the write lock longer
than 5 seconds.
All connections now use timeout=30.0, matching the pipeline fix
from commit 6882248. No logic changes.
43 lines
1.3 KiB
Python
43 lines
1.3 KiB
Python
"""Upload adapter: processes file bytes and writes to context store — MIT licensed."""
|
|
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from app.context.chunker import process_upload
|
|
from app.context.store import add_document, add_fact
|
|
|
|
|
|
def glean_upload(db_path: Path, filename: str, content: bytes) -> dict[str, Any]:
|
|
"""Process an uploaded file and write to context store. Returns result summary."""
|
|
doc_type, facts, chunks = process_upload(filename, content)
|
|
|
|
doc = add_document(
|
|
db_path,
|
|
filename=filename,
|
|
doc_type=doc_type,
|
|
full_text=content.decode("utf-8", errors="replace"),
|
|
file_size=len(content),
|
|
)
|
|
|
|
for fact in facts:
|
|
add_fact(db_path, fact.category, fact.key, fact.value, source="upload")
|
|
|
|
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
for i, chunk_text in enumerate(chunks):
|
|
conn.execute(
|
|
"INSERT INTO context_chunks(id, document_id, chunk_index, text) VALUES (?,?,?,?)",
|
|
(str(uuid.uuid4()), doc.id, i, chunk_text),
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
return {
|
|
"document_id": doc.id,
|
|
"doc_type": doc_type,
|
|
"facts_written": len(facts),
|
|
"chunks_written": len(chunks),
|
|
}
|