turnstone/app/context/store.py
pyr0ball e543ab70f7 feat: dual-backend SQLite/Postgres + multi-tenant source namespacing
- Add app/db/ abstraction layer: Backend enum, DbConn wrapper,
  dialect helper (q() for ? vs %s paramstyle), get_conn(), tenant_id()
- Auto-detect backend from DATABASE_URL; SQLite remains default when
  unset — no config change for local deployments
- Add tenant_id column to all three logical DBs (main, context, incidents);
  idempotent ALTER TABLE migration runs before schema scripts on existing DBs
- All INSERTs inject tenant_id; SELECTs use (tenant_id = ? OR tenant_id = '')
  for backward compat with pre-namespacing rows
- Add docker-compose.yml with named volume turnstone_pgdata (survives rebuilds)
  and optional external Postgres support via DATABASE_URL override
- Add scripts/migrate_sqlite_to_postgres.py — one-shot idempotent migration
  for existing SQLite data; ON CONFLICT DO NOTHING for safe re-runs
- Fix SSH glean path in pipeline.py to use ensure_schema + get_conn
  (was still using raw sqlite3.connect + old _SCHEMA without tenant_id)
- Fix FTS5 JOIN ambiguity: qualify repeat_count as f.repeat_count in search
- Update all tests to use ensure_*_schema fixtures; add row_factory where needed
- 394/394 tests passing

Closes: #42
Closes: #50
2026-06-08 08:37:54 -07:00

135 lines
4 KiB
Python

"""Context fact and document CRUD — MIT licensed."""
from __future__ import annotations
import uuid
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from app.db import get_conn, resolve_tenant_id
@dataclass(frozen=True)
class ContextFact:
id: str
category: str
key: str
value: str
source: str | None
created_at: str
@dataclass(frozen=True)
class ContextDocument:
id: str
filename: str
doc_type: str
full_text: str
file_size: int | None
uploaded_at: str
def add_fact(db_path: Path, category: str, key: str, value: str, source: str | None = None) -> ContextFact:
tid = resolve_tenant_id()
fact = ContextFact(
id=str(uuid.uuid4()),
category=category,
key=key,
value=value,
source=source,
created_at=datetime.now(timezone.utc).isoformat(),
)
with get_conn(db_path) as conn:
conn.execute(
"INSERT INTO context_facts(id, tenant_id, category, key, value, source, created_at) VALUES (?,?,?,?,?,?,?)",
(fact.id, tid, fact.category, fact.key, fact.value, fact.source, fact.created_at),
)
conn.commit()
return fact
def list_facts(db_path: Path, category: str | None = None) -> list[ContextFact]:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
if category:
rows = conn.execute(
"SELECT * FROM context_facts WHERE category=? AND (tenant_id=? OR tenant_id='') ORDER BY created_at",
(category, tid),
).fetchall()
else:
rows = conn.execute(
"SELECT * FROM context_facts WHERE (tenant_id=? OR tenant_id='') ORDER BY category, created_at",
(tid,),
).fetchall()
return [
ContextFact(
id=r["id"], category=r["category"], key=r["key"],
value=r["value"], source=r["source"], created_at=r["created_at"],
)
for r in rows
]
def delete_fact(db_path: Path, fact_id: str) -> bool:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
cursor = conn.execute(
"DELETE FROM context_facts WHERE id=? AND (tenant_id=? OR tenant_id='')",
(fact_id, tid),
)
conn.commit()
return cursor.rowcount > 0
def add_document(
db_path: Path,
filename: str,
doc_type: str,
full_text: str,
file_size: int | None = None,
) -> ContextDocument:
tid = resolve_tenant_id()
doc = ContextDocument(
id=str(uuid.uuid4()),
filename=filename,
doc_type=doc_type,
full_text=full_text,
file_size=file_size,
uploaded_at=datetime.now(timezone.utc).isoformat(),
)
with get_conn(db_path) as conn:
conn.execute(
"INSERT INTO context_documents(id, tenant_id, filename, doc_type, full_text, file_size, uploaded_at)"
" VALUES (?,?,?,?,?,?,?)",
(doc.id, tid, doc.filename, doc.doc_type, doc.full_text, doc.file_size, doc.uploaded_at),
)
conn.commit()
return doc
def list_documents(db_path: Path) -> list[ContextDocument]:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
rows = conn.execute(
"SELECT id, filename, doc_type, full_text, file_size, uploaded_at"
" FROM context_documents WHERE (tenant_id=? OR tenant_id='') ORDER BY uploaded_at DESC",
(tid,),
).fetchall()
return [
ContextDocument(
id=r["id"], filename=r["filename"], doc_type=r["doc_type"],
full_text=r["full_text"], file_size=r["file_size"], uploaded_at=r["uploaded_at"],
)
for r in rows
]
def delete_document(db_path: Path, doc_id: str) -> bool:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
cursor = conn.execute(
"DELETE FROM context_documents WHERE id=? AND (tenant_id=? OR tenant_id='')",
(doc_id, tid),
)
conn.commit()
return cursor.rowcount > 0