1212 get_args ,
1313 TYPE_CHECKING ,
1414 Final ,
15+ Type ,
1516)
1617from copy import deepcopy
1718from typing_extensions import TypeAlias
2021import numpy as np
2122import warnings
2223from typing_extensions import TypedDict , Protocol , runtime_checkable
23- from pydantic import BaseModel , field_validator
24+ from pydantic import BaseModel , field_validator , model_validator
25+ from pydantic_core import PydanticCustomError
2426
2527import chromadb .errors as errors
2628from chromadb .base_types import (
@@ -1493,15 +1495,57 @@ def validate_sparse_embedding_function(
14931495
14941496
14951497# Index Configuration Types for Collection Schema
1498+ def _create_extra_fields_validator (valid_fields : list [str ]) -> Any :
1499+ """Create a model validator that provides helpful error messages for invalid fields."""
1500+
1501+ @model_validator (mode = "before" )
1502+ def validate_extra_fields (cls : Type [BaseModel ], data : Any ) -> Any :
1503+ if isinstance (data , dict ):
1504+ invalid_fields = [k for k in data .keys () if k not in valid_fields ]
1505+ if invalid_fields :
1506+ invalid_fields_str = ", " .join (f"'{ f } '" for f in invalid_fields )
1507+ class_name = cls .__name__
1508+ # Create a clear, actionable error message
1509+ if len (invalid_fields ) == 1 :
1510+ msg = (
1511+ f"'{ invalid_fields [0 ]} ' is not a valid field for { class_name } . "
1512+ )
1513+ else :
1514+ msg = f"Invalid fields for { class_name } : { invalid_fields_str } . "
1515+
1516+ raise PydanticCustomError (
1517+ "invalid_field" ,
1518+ msg ,
1519+ {"invalid_fields" : invalid_fields },
1520+ )
1521+ return data
1522+
1523+ return validate_extra_fields
1524+
1525+
14961526class FtsIndexConfig (BaseModel ):
14971527 """Configuration for Full-Text Search index. No parameters required."""
14981528
1529+ model_config = {"extra" : "forbid" }
1530+
14991531 pass
15001532
15011533
15021534class HnswIndexConfig (BaseModel ):
15031535 """Configuration for HNSW vector index."""
15041536
1537+ _validate_extra_fields = _create_extra_fields_validator (
1538+ [
1539+ "ef_construction" ,
1540+ "max_neighbors" ,
1541+ "ef_search" ,
1542+ "num_threads" ,
1543+ "batch_size" ,
1544+ "sync_threshold" ,
1545+ "resize_factor" ,
1546+ ]
1547+ )
1548+
15051549 ef_construction : Optional [int ] = None
15061550 max_neighbors : Optional [int ] = None
15071551 ef_search : Optional [int ] = None
@@ -1514,6 +1558,27 @@ class HnswIndexConfig(BaseModel):
15141558class SpannIndexConfig (BaseModel ):
15151559 """Configuration for SPANN vector index."""
15161560
1561+ _validate_extra_fields = _create_extra_fields_validator (
1562+ [
1563+ "search_nprobe" ,
1564+ "search_rng_factor" ,
1565+ "search_rng_epsilon" ,
1566+ "nreplica_count" ,
1567+ "write_nprobe" ,
1568+ "write_rng_factor" ,
1569+ "write_rng_epsilon" ,
1570+ "split_threshold" ,
1571+ "num_samples_kmeans" ,
1572+ "initial_lambda" ,
1573+ "reassign_neighbor_count" ,
1574+ "merge_threshold" ,
1575+ "num_centers_to_merge_to" ,
1576+ "ef_construction" ,
1577+ "ef_search" ,
1578+ "max_neighbors" ,
1579+ ]
1580+ )
1581+
15171582 search_nprobe : Optional [int ] = None
15181583 write_nprobe : Optional [int ] = None
15191584 ef_construction : Optional [int ] = None
@@ -1527,7 +1592,8 @@ class SpannIndexConfig(BaseModel):
15271592class VectorIndexConfig (BaseModel ):
15281593 """Configuration for vector index with space, embedding function, and algorithm config."""
15291594
1530- model_config = {"arbitrary_types_allowed" : True }
1595+ model_config = {"arbitrary_types_allowed" : True , "extra" : "forbid" }
1596+
15311597 space : Optional [Space ] = None
15321598 embedding_function : Optional [Any ] = DefaultEmbeddingFunction ()
15331599 source_key : Optional [
@@ -1577,7 +1643,8 @@ def validate_embedding_function_field(cls, v: Any) -> Any:
15771643class SparseVectorIndexConfig (BaseModel ):
15781644 """Configuration for sparse vector index."""
15791645
1580- model_config = {"arbitrary_types_allowed" : True }
1646+ model_config = {"arbitrary_types_allowed" : True , "extra" : "forbid" }
1647+
15811648 # TODO(Sanket): Change this to the appropriate sparse ef and use a default here.
15821649 embedding_function : Optional [Any ] = None
15831650 source_key : Optional [
@@ -1628,24 +1695,32 @@ def validate_embedding_function_field(cls, v: Any) -> Any:
16281695class StringInvertedIndexConfig (BaseModel ):
16291696 """Configuration for string inverted index."""
16301697
1698+ model_config = {"extra" : "forbid" }
1699+
16311700 pass
16321701
16331702
16341703class IntInvertedIndexConfig (BaseModel ):
16351704 """Configuration for integer inverted index."""
16361705
1706+ model_config = {"extra" : "forbid" }
1707+
16371708 pass
16381709
16391710
16401711class FloatInvertedIndexConfig (BaseModel ):
16411712 """Configuration for float inverted index."""
16421713
1714+ model_config = {"extra" : "forbid" }
1715+
16431716 pass
16441717
16451718
16461719class BoolInvertedIndexConfig (BaseModel ):
16471720 """Configuration for boolean inverted index."""
16481721
1722+ model_config = {"extra" : "forbid" }
1723+
16491724 pass
16501725
16511726
0 commit comments