diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index 5bb286bd8be..2fa2063776c 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -400,11 +400,10 @@ def delete_collection( ) if existing: + self._manager.delete_segments(existing[0].id) self._sysdb.delete_collection( existing[0].id, tenant=tenant, database=database ) - # NOTE: delete_segments is a no-op in distributed mode - self._manager.delete_segments(existing[0].id) else: raise ValueError(f"Collection {name} does not exist.") diff --git a/chromadb/segment/__init__.py b/chromadb/segment/__init__.py index d1e440f17c5..bd5fdc37151 100644 --- a/chromadb/segment/__init__.py +++ b/chromadb/segment/__init__.py @@ -104,7 +104,9 @@ class SegmentManager(Component): segments as required""" @abstractmethod - def prepare_segments_for_new_collection(self, collection: Collection) -> Sequence[Segment]: + def prepare_segments_for_new_collection( + self, collection: Collection + ) -> Sequence[Segment]: """Return the segments required for a new collection. Returns only segment data, does not persist to the SysDB""" pass diff --git a/chromadb/segment/impl/manager/local.py b/chromadb/segment/impl/manager/local.py index 06b152cc7f3..6073ea1b534 100644 --- a/chromadb/segment/impl/manager/local.py +++ b/chromadb/segment/impl/manager/local.py @@ -137,7 +137,9 @@ def reset_state(self) -> None: OpenTelemetryGranularity.OPERATION_AND_SEGMENT, ) @override - def prepare_segments_for_new_collection(self, collection: Collection) -> Sequence[Segment]: + def prepare_segments_for_new_collection( + self, collection: Collection + ) -> Sequence[Segment]: vector_segment = _segment( self._vector_segment_type, SegmentScope.VECTOR, collection ) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 18d9eb07729..64fd10f1d8d 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,7 +1,11 @@ import gc import math +import os.path from uuid import UUID +from contextlib import contextmanager +from chromadb.api.segment import SegmentAPI +from chromadb.db.system import SysDB from chromadb.ingest.impl.utils import create_topic_name from chromadb.config import System @@ -9,12 +13,14 @@ from chromadb.db.impl.sqlite import SqliteDB from time import sleep import psutil + +from chromadb.segment import SegmentType from chromadb.test.property.strategies import NormalizedRecordSet, RecordSet from typing import Callable, Optional, Tuple, Union, List, TypeVar, cast, Any, Dict from typing_extensions import Literal import numpy as np import numpy.typing as npt -from chromadb.api import types +from chromadb.api import types, ClientAPI from chromadb.api.models.Collection import Collection from hypothesis import note from hypothesis.errors import InvalidArgument @@ -482,3 +488,53 @@ def log_size_for_collections_match_expected( else: assert _total_embedding_queue_log_size(sqlite) == 0 + + +@contextmanager +def collection_deleted(client: ClientAPI, collection_name: str): + # Invariant checks before deletion + assert collection_name in client.list_collections() + collection = client.get_collection(collection_name) + segments = [] + if isinstance(client._server, SegmentAPI): # type: ignore + sysdb: SysDB = client._server._sysdb # type: ignore + segments = sysdb.get_segments(collection=collection.id) + segment_types = {} + should_have_hnsw = False + for segment in segments: + segment_types[segment["type"]] = True + if segment["type"] == SegmentType.HNSW_LOCAL_PERSISTED.value: + sync_threshold = ( + collection.metadata["hnsw:sync_threshold"] + if collection.metadata is not None + and "hnsw:sync_threshold" in collection.metadata + else 1000 + ) + if ( + collection.count() > sync_threshold + ): # we only check if vector segment dir exists if we've synced at least once + should_have_hnsw = True + assert os.path.exists( + os.path.join( + client.get_settings().persist_directory, str(segment["id"]) + ) + ) + if should_have_hnsw: + assert segment_types[SegmentType.HNSW_LOCAL_PERSISTED.value] + assert segment_types[SegmentType.SQLITE.value] + + yield + + # Invariant checks after deletion + assert collection_name not in client.list_collections() + if len(segments) > 0: + sysdb: SysDB = client._server._sysdb # type: ignore + segments_after = sysdb.get_segments(collection=collection.id) + assert len(segments_after) == 0 + for segment in segments: + if segment["type"] == SegmentType.HNSW_LOCAL_PERSISTED.value: + assert not os.path.exists( + os.path.join( + client.get_settings().persist_directory, str(segment["id"]) + ) + ) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index c14574b382a..ad46b15692c 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -15,6 +15,7 @@ run_state_machine_as_test, MultipleResults, ) +import chromadb.test.property.invariants as invariants from typing import Any, Dict, Mapping, Optional import numpy from chromadb.test.property.strategies import hashing_embedding_function @@ -76,8 +77,9 @@ def get_coll(self, coll: strategies.ExternalCollection) -> None: @rule(coll=consumes(collections)) def delete_coll(self, coll: strategies.ExternalCollection) -> None: if coll.name in self.model: - self.client.delete_collection(name=coll.name) - self.delete_from_model(coll.name) + with invariants.collection_deleted(self.client, coll.name): + self.client.delete_collection(name=coll.name) + self.delete_from_model(coll.name) else: with pytest.raises(Exception): self.client.delete_collection(name=coll.name)