Skip to content

Commit 20c69bd

Browse files
Upgrade to Pydantic V2 models (#281)
This PR introduces `pydantic ^2.0.0` models, validators, and techniques. The implementation here improves performance of model construction and validation. It also introduces best practices when possible related to validators and serialization. Because RedisVL was already on pydantic v2, this change should not impact any end users as we already required this version and had been relying on the `v1` shim.
1 parent c54880a commit 20c69bd

33 files changed

+406
-294
lines changed

docs/user_guide/01_getting_started.ipynb

+118-44
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,20 @@
209209
},
210210
{
211211
"cell_type": "code",
212-
"execution_count": null,
212+
"execution_count": 4,
213213
"metadata": {},
214-
"outputs": [],
214+
"outputs": [
215+
{
216+
"data": {
217+
"text/plain": [
218+
"<redisvl.index.index.SearchIndex at 0x10faca900>"
219+
]
220+
},
221+
"execution_count": 4,
222+
"metadata": {},
223+
"output_type": "execute_result"
224+
}
225+
],
215226
"source": [
216227
"from redis import Redis\n",
217228
"\n",
@@ -238,7 +249,7 @@
238249
{
239250
"data": {
240251
"text/plain": [
241-
"<redisvl.index.index.SearchIndex at 0x7f8670a51190>"
252+
"<redisvl.index.index.SearchIndex at 0x10faca900>"
242253
]
243254
},
244255
"execution_count": 5,
@@ -293,8 +304,8 @@
293304
"name": "stdout",
294305
"output_type": "stream",
295306
"text": [
296-
"\u001b[32m11:53:23\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n",
297-
"\u001b[32m11:53:23\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. user_simple\n"
307+
"\u001b[32m11:50:15\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n",
308+
"\u001b[32m11:50:15\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. user_simple\n"
298309
]
299310
}
300311
],
@@ -320,15 +331,15 @@
320331
"│ user_simple │ HASH │ ['user_simple_docs'] │ [] │ 0 │\n",
321332
"╰──────────────┴────────────────┴──────────────────────┴─────────────────┴────────────╯\n",
322333
"Index Fields:\n",
323-
"╭────────────────┬────────────────┬─────────┬────────────────┬────────────────╮\n",
324-
"│ Name │ Attribute │ Type │ Field Option │ Option Value │\n",
325-
"├────────────────┼────────────────┼─────────┼────────────────┼────────────────┤\n",
326-
"│ user │ user │ TAG │ SEPARATOR │ , │\n",
327-
"│ credit_score │ credit_score │ TAG │ SEPARATOR │ , │\n",
328-
"│ job │ job │ TEXT │ WEIGHT │ 1 │\n",
329-
"│ age │ age │ NUMERIC │ │ │\n",
330-
"│ user_embedding │ user_embedding │ VECTOR │ \n",
331-
"╰────────────────┴────────────────┴─────────┴────────────────┴────────────────╯\n"
334+
"╭────────────────┬────────────────┬─────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬─────────────────┬────────────────\n",
335+
"│ Name │ Attribute │ Type │ Field Option │ Option Value │ Field Option │ Option Value │ Field Option │ Option Value │ Field Option │ Option Value │\n",
336+
"├────────────────┼────────────────┼─────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼─────────────────┼────────────────\n",
337+
"│ user │ user │ TAG │ SEPARATOR │ , │ │ │ │ │ │ │\n",
338+
"│ credit_score │ credit_score │ TAG │ SEPARATOR │ , │ │ │ │ │ │ │\n",
339+
"│ job │ job │ TEXT │ WEIGHT │ 1 │ │ │ │ │ │ │\n",
340+
"│ age │ age │ NUMERIC │ │ │ │ │ │ │ │ │\n",
341+
"│ user_embedding │ user_embedding │ VECTOR │ algorithm │ FLAT │ data_type │ FLOAT32 │ dim │ 3 │ distance_metric │ COSINE\n",
342+
"╰────────────────┴────────────────┴─────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴─────────────────┴────────────────\n"
332343
]
333344
}
334345
],
@@ -354,7 +365,7 @@
354365
"name": "stdout",
355366
"output_type": "stream",
356367
"text": [
357-
"['user_simple_docs:d424b73c516442f7919cc11ed3bb1882', 'user_simple_docs:6da16f88342048e79b3500bec5448805', 'user_simple_docs:ef5a590ef85e4d4888fd8ebe79ae1e8c']\n"
368+
"['user_simple_docs:01JM2NWFWNH0BNA640MT5DS8BD', 'user_simple_docs:01JM2NWFWNF4S2V4E4HYG25CVA', 'user_simple_docs:01JM2NWFWNBFXJJ4PV9F4KMJSE']\n"
358369
]
359370
}
360371
],
@@ -388,7 +399,7 @@
388399
"name": "stdout",
389400
"output_type": "stream",
390401
"text": [
391-
"['user_simple_docs:9806a362604f4700b17513cc94fcf10d']\n"
402+
"['user_simple_docs:01JM2NWJGYMJ0QTR5YB4MB0BX9']\n"
392403
]
393404
}
394405
],
@@ -476,9 +487,50 @@
476487
},
477488
{
478489
"cell_type": "code",
479-
"execution_count": null,
490+
"execution_count": 13,
480491
"metadata": {},
481-
"outputs": [],
492+
"outputs": [
493+
{
494+
"data": {
495+
"text/plain": [
496+
"{'index': {'name': 'user_simple', 'prefix': 'user_simple_docs'},\n",
497+
" 'fields': [{'name': 'user', 'type': 'tag'},\n",
498+
" {'name': 'credit_score', 'type': 'tag'},\n",
499+
" {'name': 'job', 'type': 'text'},\n",
500+
" {'name': 'age', 'type': 'numeric'},\n",
501+
" {'name': 'user_embedding',\n",
502+
" 'type': 'vector',\n",
503+
" 'attrs': {'dims': 3,\n",
504+
" 'distance_metric': 'cosine',\n",
505+
" 'algorithm': 'flat',\n",
506+
" 'datatype': 'float32'}}]}"
507+
]
508+
},
509+
"execution_count": 13,
510+
"metadata": {},
511+
"output_type": "execute_result"
512+
}
513+
],
514+
"source": [
515+
"schema"
516+
]
517+
},
518+
{
519+
"cell_type": "code",
520+
"execution_count": 14,
521+
"metadata": {},
522+
"outputs": [
523+
{
524+
"data": {
525+
"text/plain": [
526+
"<redisvl.index.index.AsyncSearchIndex at 0x10facacf0>"
527+
]
528+
},
529+
"execution_count": 14,
530+
"metadata": {},
531+
"output_type": "execute_result"
532+
}
533+
],
482534
"source": [
483535
"from redisvl.index import AsyncSearchIndex\n",
484536
"from redis.asyncio import Redis\n",
@@ -491,7 +543,7 @@
491543
},
492544
{
493545
"cell_type": "code",
494-
"execution_count": 14,
546+
"execution_count": 15,
495547
"metadata": {},
496548
"outputs": [
497549
{
@@ -532,7 +584,7 @@
532584
},
533585
{
534586
"cell_type": "code",
535-
"execution_count": 15,
587+
"execution_count": 16,
536588
"metadata": {},
537589
"outputs": [],
538590
"source": [
@@ -620,24 +672,24 @@
620672
"│ Stat Key │ Value │\n",
621673
"├─────────────────────────────┼─────────────┤\n",
622674
"│ num_docs │ 4 │\n",
623-
"│ num_terms │ 0\n",
675+
"│ num_terms │ 4\n",
624676
"│ max_doc_id │ 4 │\n",
625-
"│ num_records │ 20\n",
677+
"│ num_records │ 22\n",
626678
"│ percent_indexed │ 1 │\n",
627679
"│ hash_indexing_failures │ 0 │\n",
628-
"│ number_of_uses │ 2\n",
629-
"│ bytes_per_record_avg │ 1 \n",
630-
"│ doc_table_size_mb │ 0.00044632 \n",
631-
"│ inverted_sz_mb │ 1.90735e-05\n",
632-
"│ key_table_size_mb │ 0.000138283\n",
633-
"│ offset_bits_per_record_avg │ nan\n",
634-
"│ offset_vectors_sz_mb │ 0 \n",
635-
"│ offsets_per_term_avg │ 0 \n",
636-
"│ records_per_doc_avg │ 5 \n",
680+
"│ number_of_uses │ 5\n",
681+
"│ bytes_per_record_avg │ 50.9091\n",
682+
"│ doc_table_size_mb │ 0.000423431\n",
683+
"│ inverted_sz_mb │ 0.00106812 \n",
684+
"│ key_table_size_mb │ 0.000165939\n",
685+
"│ offset_bits_per_record_avg │ 8 \n",
686+
"│ offset_vectors_sz_mb │ 5.72205e-06\n",
687+
"│ offsets_per_term_avg │ 0.272727\n",
688+
"│ records_per_doc_avg │ 5.5\n",
637689
"│ sortable_values_size_mb │ 0 │\n",
638-
"│ total_indexing_time │ 1.796\n",
639-
"│ total_inverted_index_blocks │ 11\n",
640-
"│ vector_index_sz_mb │ 0.235603 \n",
690+
"│ total_indexing_time │ 0.197\n",
691+
"│ total_inverted_index_blocks │ 12\n",
692+
"│ vector_index_sz_mb │ 0.0201416\n",
641693
"╰─────────────────────────────┴─────────────╯\n"
642694
]
643695
}
@@ -657,7 +709,7 @@
657709
"cell_type": "markdown",
658710
"metadata": {},
659711
"source": [
660-
"Below we will clean up after our work. First, you can optionally flush all data from Redis associated with the index by\n",
712+
"Below we will clean up after our work. First, you can flush all data from Redis associated with the index by\n",
661713
"using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n",
662714
"\n",
663715
"But if you want to clean up everything, including the index, just use `.delete()`\n",
@@ -666,31 +718,53 @@
666718
},
667719
{
668720
"cell_type": "code",
669-
"execution_count": null,
721+
"execution_count": 19,
670722
"metadata": {},
671-
"outputs": [],
723+
"outputs": [
724+
{
725+
"data": {
726+
"text/plain": [
727+
"4"
728+
]
729+
},
730+
"execution_count": 19,
731+
"metadata": {},
732+
"output_type": "execute_result"
733+
}
734+
],
672735
"source": [
673-
"# (optionally) clear all data from Redis associated with the index\n",
736+
"# Clear all data from Redis associated with the index\n",
674737
"await index.clear()"
675738
]
676739
},
677740
{
678741
"cell_type": "code",
679-
"execution_count": null,
742+
"execution_count": 20,
680743
"metadata": {},
681-
"outputs": [],
744+
"outputs": [
745+
{
746+
"data": {
747+
"text/plain": [
748+
"True"
749+
]
750+
},
751+
"execution_count": 20,
752+
"metadata": {},
753+
"output_type": "execute_result"
754+
}
755+
],
682756
"source": [
683-
"# but the index is still in place\n",
757+
"# Butm the index is still in place\n",
684758
"await index.exists()"
685759
]
686760
},
687761
{
688762
"cell_type": "code",
689-
"execution_count": 19,
763+
"execution_count": 21,
690764
"metadata": {},
691765
"outputs": [],
692766
"source": [
693-
"# remove / delete the index in its entirety\n",
767+
"# Remove / delete the index in its entirety\n",
694768
"await index.delete()"
695769
]
696770
}
@@ -711,7 +785,7 @@
711785
"name": "python",
712786
"nbconvert_exporter": "python",
713787
"pygments_lexer": "ipython3",
714-
"version": "3.11.9"
788+
"version": "3.13.2"
715789
},
716790
"orig_nbformat": 4
717791
},

docs/user_guide/08_semantic_router.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@
421421
"source": [
422422
"router2 = SemanticRouter.from_dict(router.to_dict(), redis_url=\"redis://localhost:6379\")\n",
423423
"\n",
424-
"assert router2 == router"
424+
"assert router2.to_dict() == router.to_dict()"
425425
]
426426
},
427427
{
@@ -449,7 +449,7 @@
449449
"source": [
450450
"router3 = SemanticRouter.from_yaml(\"router.yaml\", redis_url=\"redis://localhost:6379\")\n",
451451
"\n",
452-
"assert router3 == router2 == router"
452+
"assert router3.to_dict() == router2.to_dict() == router.to_dict()"
453453
]
454454
},
455455
{

redisvl/extensions/llmcache/schema.py

+23-22
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Any, Dict, List, Optional
22

3-
from pydantic.v1 import BaseModel, Field, root_validator, validator
3+
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
44

55
from redisvl.extensions.constants import (
66
CACHE_VECTOR_FIELD_NAME,
@@ -34,22 +34,23 @@ class CacheEntry(BaseModel):
3434
filters: Optional[Dict[str, Any]] = Field(default=None)
3535
"""Optional filter data stored on the cache entry for customizing retrieval"""
3636

37-
@root_validator(pre=True)
37+
@model_validator(mode="before")
3838
@classmethod
3939
def generate_id(cls, values):
4040
# Ensure entry_id is set
4141
if not values.get("entry_id"):
4242
values["entry_id"] = hashify(values["prompt"], values.get("filters"))
4343
return values
4444

45-
@validator("metadata")
45+
@field_validator("metadata")
46+
@classmethod
4647
def non_empty_metadata(cls, v):
4748
if v is not None and not isinstance(v, dict):
4849
raise TypeError("Metadata must be a dictionary.")
4950
return v
5051

5152
def to_dict(self, dtype: str) -> Dict:
52-
data = self.dict(exclude_none=True)
53+
data = self.model_dump(exclude_none=True)
5354
data["prompt_vector"] = array_to_buffer(self.prompt_vector, dtype)
5455
if self.metadata is not None:
5556
data["metadata"] = serialize(self.metadata)
@@ -79,33 +80,33 @@ class CacheHit(BaseModel):
7980
filters: Optional[Dict[str, Any]] = Field(default=None)
8081
"""Optional filter data stored on the cache entry for customizing retrieval"""
8182

82-
@root_validator(pre=True)
83+
# Allow extra fields to simplify handling filters
84+
model_config = ConfigDict(extra="allow")
85+
86+
@model_validator(mode="before")
8387
@classmethod
84-
def validate_cache_hit(cls, values):
88+
def validate_cache_hit(cls, values: Dict[str, Any]) -> Dict[str, Any]:
8589
# Deserialize metadata if necessary
8690
if "metadata" in values and isinstance(values["metadata"], str):
8791
values["metadata"] = deserialize(values["metadata"])
8892

89-
# Separate filters from other fields
90-
known_fields = set(cls.__fields__.keys())
91-
filters = {k: v for k, v in values.items() if k not in known_fields}
92-
93-
# Add filters to values
94-
if filters:
95-
values["filters"] = filters
96-
97-
# Remove filter fields from the main values
98-
for k in filters:
99-
values.pop(k)
93+
# Collect any extra fields and store them as filters
94+
extra_data = values.pop("__pydantic_extra__", {}) or {}
95+
if extra_data:
96+
current_filters = values.get("filters") or {}
97+
if not isinstance(current_filters, dict):
98+
current_filters = {}
99+
current_filters.update(extra_data)
100+
values["filters"] = current_filters
100101

101102
return values
102103

103-
def to_dict(self) -> Dict:
104-
data = self.dict(exclude_none=True)
105-
if self.filters:
106-
data.update(self.filters)
104+
def to_dict(self) -> Dict[str, Any]:
105+
"""Convert this model to a dictionary, merging filters into the result."""
106+
data = self.model_dump(exclude_none=True)
107+
if data.get("filters"):
108+
data.update(data["filters"])
107109
del data["filters"]
108-
109110
return data
110111

111112

redisvl/extensions/llmcache/semantic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def __init__(
125125

126126
# Create semantic cache schema and index
127127
schema = SemanticCacheIndexSchema.from_params(
128-
name, prefix, vectorizer.dims, vectorizer.dtype
128+
name, prefix, vectorizer.dims, vectorizer.dtype # type: ignore
129129
)
130130
schema = self._modify_schema(schema, filterable_fields)
131131
self._index = SearchIndex(schema=schema)
@@ -141,7 +141,7 @@ def __init__(
141141
existing_index = SearchIndex.from_existing(
142142
name, redis_client=self._index.client
143143
)
144-
if existing_index.schema != self._index.schema:
144+
if existing_index.schema.to_dict() != self._index.schema.to_dict():
145145
raise ValueError(
146146
f"Existing index {name} schema does not match the user provided schema for the semantic cache. "
147147
"If you wish to overwrite the index schema, set overwrite=True during initialization."

0 commit comments

Comments
 (0)