fix(vector): make VectorMatch.metadata immutable; rename id to entry_id

This commit is contained in:
pyr0ball 2026-05-04 11:46:24 -07:00
parent fe51914902
commit 9492942623
2 changed files with 40 additions and 19 deletions

View file

@ -8,22 +8,30 @@ from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass, field from dataclasses import dataclass, field
from types import MappingProxyType
from typing import Any, Mapping
@dataclass(frozen=True) @dataclass(frozen=True)
class VectorMatch: class VectorMatch:
"""A single result from a vector similarity search.""" """A single result from a vector similarity search."""
id: str entry_id: str
score: float # lower is better (cosine / L2 distance) score: float # lower is better (L2 / cosine distance)
metadata: dict = field(default_factory=dict) metadata: Mapping[str, Any] = field(default_factory=dict)
def __post_init__(self) -> None:
if isinstance(self.metadata, dict):
object.__setattr__(self, "metadata", MappingProxyType(self.metadata))
class VectorStore(ABC): class VectorStore(ABC):
"""Abstract interface for vector storage backends.""" """Abstract interface for vector storage backends."""
@abstractmethod @abstractmethod
def upsert(self, id: str, vector: list[float], metadata: dict) -> None: def upsert(
self, entry_id: str, vector: list[float], metadata: dict[str, Any]
) -> None:
"""Insert or replace a vector and its metadata.""" """Insert or replace a vector and its metadata."""
@abstractmethod @abstractmethod
@ -31,14 +39,17 @@ class VectorStore(ABC):
self, self,
vector: list[float], vector: list[float],
top_k: int = 10, top_k: int = 10,
filter_metadata: dict | None = None, filter_metadata: dict[str, Any] | None = None,
) -> list[VectorMatch]: ) -> list[VectorMatch]:
"""Return the top_k nearest vectors. Optional metadata filter applied post-search.""" """Return the top_k nearest vectors. Optional metadata filter applied post-search."""
@abstractmethod @abstractmethod
def delete(self, id: str) -> None: def delete(self, entry_id: str) -> None:
"""Remove a single vector by string ID.""" """Remove a single vector by string ID. No-op if not found."""
@abstractmethod @abstractmethod
def delete_where(self, filter_metadata: dict) -> int: def delete_where(self, filter_metadata: dict[str, Any]) -> int:
"""Remove all vectors whose metadata matches all key-value pairs. Returns count.""" """Remove all vectors whose metadata matches all key-value pairs. Returns count removed.
Raises ValueError if filter_metadata is empty (would delete entire store).
"""

View file

@ -1,8 +1,10 @@
# tests/test_vector/test_base.py
"""Tests for VectorStore ABC and VectorMatch.""" """Tests for VectorStore ABC and VectorMatch."""
from __future__ import annotations from __future__ import annotations
from dataclasses import FrozenInstanceError
from types import MappingProxyType
import pytest import pytest
from circuitforge_core.vector.base import VectorMatch, VectorStore from circuitforge_core.vector.base import VectorMatch, VectorStore
@ -14,8 +16,8 @@ class _ConcreteStore(VectorStore):
def __init__(self) -> None: def __init__(self) -> None:
self._data: dict[str, tuple[list[float], dict]] = {} self._data: dict[str, tuple[list[float], dict]] = {}
def upsert(self, id: str, vector: list[float], metadata: dict) -> None: def upsert(self, entry_id: str, vector: list[float], metadata: dict) -> None:
self._data[id] = (vector, metadata) self._data[entry_id] = (vector, metadata)
def query( def query(
self, self,
@ -24,7 +26,8 @@ class _ConcreteStore(VectorStore):
filter_metadata: dict | None = None, filter_metadata: dict | None = None,
) -> list[VectorMatch]: ) -> list[VectorMatch]:
results = [ results = [
VectorMatch(id=k, score=0.0, metadata=v[1]) for k, v in self._data.items() VectorMatch(entry_id=k, score=0.0, metadata=v[1])
for k, v in self._data.items()
] ]
if filter_metadata: if filter_metadata:
results = [ results = [
@ -34,8 +37,8 @@ class _ConcreteStore(VectorStore):
] ]
return results[:top_k] return results[:top_k]
def delete(self, id: str) -> None: def delete(self, entry_id: str) -> None:
self._data.pop(id, None) self._data.pop(entry_id, None)
def delete_where(self, filter_metadata: dict) -> int: def delete_where(self, filter_metadata: dict) -> int:
to_remove = [ to_remove = [
@ -49,17 +52,24 @@ class _ConcreteStore(VectorStore):
def test_vector_match_is_frozen(): def test_vector_match_is_frozen():
match = VectorMatch(id="a", score=0.1, metadata={}) match = VectorMatch(entry_id="a", score=0.1, metadata={})
with pytest.raises(Exception): with pytest.raises(FrozenInstanceError):
match.score = 0.5 # type: ignore[misc] match.score = 0.5 # type: ignore[misc]
def test_vector_match_metadata_is_not_mutable():
match = VectorMatch(entry_id="a", score=0.1, metadata={"k": "v"})
assert isinstance(match.metadata, MappingProxyType)
with pytest.raises(TypeError):
match.metadata["k"] = "changed" # type: ignore[index]
def test_upsert_and_query(): def test_upsert_and_query():
store = _ConcreteStore() store = _ConcreteStore()
store.upsert("chunk-1", [0.1, 0.2], {"doc_id": "book-a", "page": 1}) store.upsert("chunk-1", [0.1, 0.2], {"doc_id": "book-a", "page": 1})
results = store.query([0.1, 0.2]) results = store.query([0.1, 0.2])
assert len(results) == 1 assert len(results) == 1
assert results[0].id == "chunk-1" assert results[0].entry_id == "chunk-1"
assert results[0].metadata["page"] == 1 assert results[0].metadata["page"] == 1
@ -69,7 +79,7 @@ def test_query_filter_metadata():
store.upsert("c2", [0.2], {"doc_id": "book-b"}) store.upsert("c2", [0.2], {"doc_id": "book-b"})
results = store.query([0.1], filter_metadata={"doc_id": "book-a"}) results = store.query([0.1], filter_metadata={"doc_id": "book-a"})
assert len(results) == 1 assert len(results) == 1
assert results[0].id == "c1" assert results[0].entry_id == "c1"
def test_delete(): def test_delete():