turnstone/app/context/store.py
pyr0ball aafb4e2cad fix: separate context KB into own SQLite file to eliminate write-lock contention
context_facts, context_documents, and context_chunks now live in
turnstone-context.db (sibling of turnstone.db).  The glean scheduler
held write locks on the main DB long enough to cause 5-second timeout
failures on context fact inserts; separate files have independent WAL
write locks so they never contend.

Changes:
- pipeline.py: extract _CONTEXT_SCHEMA + ensure_context_schema()
- rest.py: CONTEXT_DB_PATH (TURNSTONE_CONTEXT_DB env var, defaults to
  sibling file); init via ensure_context_schema(); all context routes
  pass CONTEXT_DB_PATH; diagnose_stream receives context_db_path kwarg
- diagnose/__init__.py: diagnose_stream() accepts context_db_path
  (falls back to db_path for backward compat); retrieve_context uses it
- store.py: sqlite3.connect() timeout=30.0 — Python driver retry loop
  is independent of PRAGMA busy_timeout; needed for any remaining
  contention during test or single-file deployments

Closes: #42
2026-05-25 21:19:32 -07:00

137 lines
3.9 KiB
Python

"""Context fact and document CRUD — MIT licensed."""
from __future__ import annotations
import sqlite3
import uuid
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
@dataclass(frozen=True)
class ContextFact:
id: str
category: str
key: str
value: str
source: str | None
created_at: str
@dataclass(frozen=True)
class ContextDocument:
id: str
filename: str
doc_type: str
full_text: str
file_size: int | None
uploaded_at: str
def _connect(db_path: Path) -> sqlite3.Connection:
# timeout=30: retry for up to 30 s when another writer (e.g. the glean
# collector) holds a WAL write lock. PRAGMA busy_timeout is a SQLite-level
# hint that operates after the connection is open; the Python sqlite3 module's
# own retry loop is controlled solely by this timeout= argument.
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
conn.row_factory = sqlite3.Row
return conn
def add_fact(db_path: Path, category: str, key: str, value: str, source: str | None = None) -> ContextFact:
fact = ContextFact(
id=str(uuid.uuid4()),
category=category,
key=key,
value=value,
source=source,
created_at=datetime.now(timezone.utc).isoformat(),
)
conn = _connect(db_path)
conn.execute(
"INSERT INTO context_facts(id, category, key, value, source, created_at) VALUES (?,?,?,?,?,?)",
(fact.id, fact.category, fact.key, fact.value, fact.source, fact.created_at),
)
conn.commit()
conn.close()
return fact
def list_facts(db_path: Path, category: str | None = None) -> list[ContextFact]:
conn = _connect(db_path)
if category:
rows = conn.execute(
"SELECT * FROM context_facts WHERE category=? ORDER BY created_at", (category,)
).fetchall()
else:
rows = conn.execute(
"SELECT * FROM context_facts ORDER BY category, created_at"
).fetchall()
conn.close()
return [
ContextFact(
id=r["id"], category=r["category"], key=r["key"],
value=r["value"], source=r["source"], created_at=r["created_at"],
)
for r in rows
]
def delete_fact(db_path: Path, fact_id: str) -> bool:
conn = _connect(db_path)
cursor = conn.execute("DELETE FROM context_facts WHERE id=?", (fact_id,))
conn.commit()
conn.close()
return cursor.rowcount > 0
def add_document(
db_path: Path,
filename: str,
doc_type: str,
full_text: str,
file_size: int | None = None,
) -> ContextDocument:
doc = ContextDocument(
id=str(uuid.uuid4()),
filename=filename,
doc_type=doc_type,
full_text=full_text,
file_size=file_size,
uploaded_at=datetime.now(timezone.utc).isoformat(),
)
conn = _connect(db_path)
conn.execute(
"INSERT INTO context_documents(id, filename, doc_type, full_text, file_size, uploaded_at)"
" VALUES (?,?,?,?,?,?)",
(doc.id, doc.filename, doc.doc_type, doc.full_text, doc.file_size, doc.uploaded_at),
)
conn.commit()
conn.close()
return doc
def list_documents(db_path: Path) -> list[ContextDocument]:
conn = _connect(db_path)
rows = conn.execute(
"SELECT id, filename, doc_type, full_text, file_size, uploaded_at"
" FROM context_documents ORDER BY uploaded_at DESC"
).fetchall()
conn.close()
return [
ContextDocument(
id=r["id"], filename=r["filename"], doc_type=r["doc_type"],
full_text=r["full_text"], file_size=r["file_size"], uploaded_at=r["uploaded_at"],
)
for r in rows
]
def delete_document(db_path: Path, doc_id: str) -> bool:
conn = _connect(db_path)
cursor = conn.execute("DELETE FROM context_documents WHERE id=?", (doc_id,))
conn.commit()
conn.close()
return cursor.rowcount > 0