fix(vector): make VectorMatch.metadata immutable; rename id to entry_id
This commit is contained in:
parent
fe51914902
commit
9492942623
2 changed files with 40 additions and 19 deletions
|
|
@ -8,22 +8,30 @@ from __future__ import annotations
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
from types import MappingProxyType
|
||||||
|
from typing import Any, Mapping
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class VectorMatch:
|
class VectorMatch:
|
||||||
"""A single result from a vector similarity search."""
|
"""A single result from a vector similarity search."""
|
||||||
|
|
||||||
id: str
|
entry_id: str
|
||||||
score: float # lower is better (cosine / L2 distance)
|
score: float # lower is better (L2 / cosine distance)
|
||||||
metadata: dict = field(default_factory=dict)
|
metadata: Mapping[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if isinstance(self.metadata, dict):
|
||||||
|
object.__setattr__(self, "metadata", MappingProxyType(self.metadata))
|
||||||
|
|
||||||
|
|
||||||
class VectorStore(ABC):
|
class VectorStore(ABC):
|
||||||
"""Abstract interface for vector storage backends."""
|
"""Abstract interface for vector storage backends."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def upsert(self, id: str, vector: list[float], metadata: dict) -> None:
|
def upsert(
|
||||||
|
self, entry_id: str, vector: list[float], metadata: dict[str, Any]
|
||||||
|
) -> None:
|
||||||
"""Insert or replace a vector and its metadata."""
|
"""Insert or replace a vector and its metadata."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
@ -31,14 +39,17 @@ class VectorStore(ABC):
|
||||||
self,
|
self,
|
||||||
vector: list[float],
|
vector: list[float],
|
||||||
top_k: int = 10,
|
top_k: int = 10,
|
||||||
filter_metadata: dict | None = None,
|
filter_metadata: dict[str, Any] | None = None,
|
||||||
) -> list[VectorMatch]:
|
) -> list[VectorMatch]:
|
||||||
"""Return the top_k nearest vectors. Optional metadata filter applied post-search."""
|
"""Return the top_k nearest vectors. Optional metadata filter applied post-search."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def delete(self, id: str) -> None:
|
def delete(self, entry_id: str) -> None:
|
||||||
"""Remove a single vector by string ID."""
|
"""Remove a single vector by string ID. No-op if not found."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def delete_where(self, filter_metadata: dict) -> int:
|
def delete_where(self, filter_metadata: dict[str, Any]) -> int:
|
||||||
"""Remove all vectors whose metadata matches all key-value pairs. Returns count."""
|
"""Remove all vectors whose metadata matches all key-value pairs. Returns count removed.
|
||||||
|
|
||||||
|
Raises ValueError if filter_metadata is empty (would delete entire store).
|
||||||
|
"""
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
# tests/test_vector/test_base.py
|
|
||||||
"""Tests for VectorStore ABC and VectorMatch."""
|
"""Tests for VectorStore ABC and VectorMatch."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import FrozenInstanceError
|
||||||
|
from types import MappingProxyType
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from circuitforge_core.vector.base import VectorMatch, VectorStore
|
from circuitforge_core.vector.base import VectorMatch, VectorStore
|
||||||
|
|
@ -14,8 +16,8 @@ class _ConcreteStore(VectorStore):
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._data: dict[str, tuple[list[float], dict]] = {}
|
self._data: dict[str, tuple[list[float], dict]] = {}
|
||||||
|
|
||||||
def upsert(self, id: str, vector: list[float], metadata: dict) -> None:
|
def upsert(self, entry_id: str, vector: list[float], metadata: dict) -> None:
|
||||||
self._data[id] = (vector, metadata)
|
self._data[entry_id] = (vector, metadata)
|
||||||
|
|
||||||
def query(
|
def query(
|
||||||
self,
|
self,
|
||||||
|
|
@ -24,7 +26,8 @@ class _ConcreteStore(VectorStore):
|
||||||
filter_metadata: dict | None = None,
|
filter_metadata: dict | None = None,
|
||||||
) -> list[VectorMatch]:
|
) -> list[VectorMatch]:
|
||||||
results = [
|
results = [
|
||||||
VectorMatch(id=k, score=0.0, metadata=v[1]) for k, v in self._data.items()
|
VectorMatch(entry_id=k, score=0.0, metadata=v[1])
|
||||||
|
for k, v in self._data.items()
|
||||||
]
|
]
|
||||||
if filter_metadata:
|
if filter_metadata:
|
||||||
results = [
|
results = [
|
||||||
|
|
@ -34,8 +37,8 @@ class _ConcreteStore(VectorStore):
|
||||||
]
|
]
|
||||||
return results[:top_k]
|
return results[:top_k]
|
||||||
|
|
||||||
def delete(self, id: str) -> None:
|
def delete(self, entry_id: str) -> None:
|
||||||
self._data.pop(id, None)
|
self._data.pop(entry_id, None)
|
||||||
|
|
||||||
def delete_where(self, filter_metadata: dict) -> int:
|
def delete_where(self, filter_metadata: dict) -> int:
|
||||||
to_remove = [
|
to_remove = [
|
||||||
|
|
@ -49,17 +52,24 @@ class _ConcreteStore(VectorStore):
|
||||||
|
|
||||||
|
|
||||||
def test_vector_match_is_frozen():
|
def test_vector_match_is_frozen():
|
||||||
match = VectorMatch(id="a", score=0.1, metadata={})
|
match = VectorMatch(entry_id="a", score=0.1, metadata={})
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(FrozenInstanceError):
|
||||||
match.score = 0.5 # type: ignore[misc]
|
match.score = 0.5 # type: ignore[misc]
|
||||||
|
|
||||||
|
|
||||||
|
def test_vector_match_metadata_is_not_mutable():
|
||||||
|
match = VectorMatch(entry_id="a", score=0.1, metadata={"k": "v"})
|
||||||
|
assert isinstance(match.metadata, MappingProxyType)
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
match.metadata["k"] = "changed" # type: ignore[index]
|
||||||
|
|
||||||
|
|
||||||
def test_upsert_and_query():
|
def test_upsert_and_query():
|
||||||
store = _ConcreteStore()
|
store = _ConcreteStore()
|
||||||
store.upsert("chunk-1", [0.1, 0.2], {"doc_id": "book-a", "page": 1})
|
store.upsert("chunk-1", [0.1, 0.2], {"doc_id": "book-a", "page": 1})
|
||||||
results = store.query([0.1, 0.2])
|
results = store.query([0.1, 0.2])
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
assert results[0].id == "chunk-1"
|
assert results[0].entry_id == "chunk-1"
|
||||||
assert results[0].metadata["page"] == 1
|
assert results[0].metadata["page"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -69,7 +79,7 @@ def test_query_filter_metadata():
|
||||||
store.upsert("c2", [0.2], {"doc_id": "book-b"})
|
store.upsert("c2", [0.2], {"doc_id": "book-b"})
|
||||||
results = store.query([0.1], filter_metadata={"doc_id": "book-a"})
|
results = store.query([0.1], filter_metadata={"doc_id": "book-a"})
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
assert results[0].id == "c1"
|
assert results[0].entry_id == "c1"
|
||||||
|
|
||||||
|
|
||||||
def test_delete():
|
def test_delete():
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue