"""Ollama embedding client with sqlite-vec storage — BSL licensed.""" from __future__ import annotations import logging import sqlite3 import struct from pathlib import Path import httpx logger = logging.getLogger(__name__) EMBEDDING_AVAILABLE: bool = False try: import sqlite_vec # type: ignore[import] # noqa: F401 EMBEDDING_AVAILABLE = True logger.debug("sqlite-vec loaded — embedding pipeline enabled") except ImportError: logger.debug("sqlite-vec not available — embedding pipeline disabled") def embed_chunks( db_path: Path, document_id: str, llm_url: str, model: str = "nomic-embed-text", timeout: float = 60.0, ) -> int: """Embed all unembedded chunks for a document. Returns count embedded. No-op when EMBEDDING_AVAILABLE is False.""" if not EMBEDDING_AVAILABLE: return 0 conn = sqlite3.connect(str(db_path)) conn.execute("PRAGMA journal_mode=WAL") conn.row_factory = sqlite3.Row rows = conn.execute( "SELECT id, text FROM context_chunks WHERE document_id=? AND embedding IS NULL", (document_id,), ).fetchall() count = 0 for row in rows: try: resp = httpx.post( f"{llm_url.rstrip('/')}/api/embeddings", json={"model": model, "prompt": row["text"]}, timeout=timeout, ) resp.raise_for_status() vector: list[float] = resp.json().get("embedding") or [] if vector: blob = struct.pack(f"{len(vector)}f", *vector) conn.execute( "UPDATE context_chunks SET embedding=? WHERE id=?", (blob, row["id"]), ) count += 1 except Exception as exc: logger.warning("Embedding chunk %s failed: %s", row["id"], exc) conn.commit() conn.close() return count