Skip to content

Commit 95c1f18

Browse files
committed
Add marqo vector store
1 parent e5a9190 commit 95c1f18

File tree

7 files changed

+150
-1
lines changed

7 files changed

+150
-1
lines changed

evadb/catalog/catalog_type.py

+1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ class VectorStoreType(EvaDBEnum):
119119
CHROMADB # noqa: F821
120120
WEAVIATE # noqa: F821
121121
MILVUS # noqa: F821
122+
MARQO # noqa: F821
122123

123124

124125
class VideoColumnName(EvaDBEnum):

evadb/interfaces/relational/db.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def create_vector_index(
269269
table_name (str): Name of the table.
270270
expr (str): Expression used to build the vector index.
271271
272-
using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS`.
272+
using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS` or `MARQO`.
273273
274274
Returns:
275275
EvaDBCursor: The EvaDBCursor object.

evadb/parser/lark_visitor/_create_statements.py

+2
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ def vector_store_type(self, tree):
304304
vector_store_type = VectorStoreType.WEAVIATE
305305
elif str.upper(token) == "MILVUS":
306306
vector_store_type = VectorStoreType.MILVUS
307+
elif str.upper(token) == "MARQO":
308+
vector_store_type = VectorStoreType.MARQO
307309
return vector_store_type
308310

309311

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
from typing import List
2+
3+
from evadb.third_party.vector_stores.types import (
4+
FeaturePayload,
5+
VectorIndexQuery,
6+
VectorIndexQueryResult,
7+
VectorStore,
8+
)
9+
from evadb.utils.generic_utils import try_to_import_marqo_client
10+
11+
_marqo_client_instance = None
12+
13+
required_params = ["url", "index_name"]
14+
15+
16+
def get_marqo_client(url: str, api_key: str=None):
17+
global _marqo_client_instance
18+
if _marqo_client_instance is None:
19+
try_to_import_marqo_client()
20+
import marqo as mq
21+
_marqo_client_instance = mq.Client(url=url, api_key=api_key)
22+
return _marqo_client_instance
23+
24+
25+
class MarqoVectorStore(VectorStore):
26+
def __init__(self, index_name: str, url: str = "http://0.0.0.0:8882", api_key=None) -> None:
27+
self._client = get_marqo_client(url=url)
28+
self._index_name = index_name
29+
30+
def create(self, vector_dim: int):
31+
32+
# Delete index if exists already
33+
if self._index_name in [i.index_name for i in self._client.get_indexes()['results']]:
34+
self.delete()
35+
36+
# create fresh
37+
# Refer here for details - https://docs.marqo.ai/2.0.0/API-Reference/Indexes/create_index/
38+
self._client.create_index(
39+
index_name=self._index_name,
40+
settings_dict={
41+
'index_defaults': {
42+
'model': 'no_model',
43+
'model_properties': {
44+
'dimensions': vector_dim
45+
},
46+
47+
'normalize_embeddings': True,
48+
'ann_parameters':{
49+
'space_type': 'cosinesimil'
50+
}
51+
}
52+
}
53+
)
54+
55+
def add(self, payload: List[FeaturePayload]):
56+
57+
ids = [int(row.id) for row in payload]
58+
embeddings = [row.embedding for row in payload]
59+
60+
data = []
61+
for _id, _emb in zip(ids, embeddings):
62+
_id = str(_id)
63+
data.append(
64+
{
65+
'_id': _id,
66+
'evadb_data':{
67+
'vector': _emb
68+
}
69+
}
70+
)
71+
72+
# For reference and more information
73+
# check - https://docs.marqo.ai/1.4.0/Guides/Advanced-Usage/document_fields/#custom-vector-object
74+
self._client.index(
75+
index_name=self._index_name
76+
).add_documents(
77+
documents=data,
78+
mappings={
79+
'evadb_data':{
80+
'type': 'custom_vector'
81+
}
82+
},
83+
tensor_fields=['evadb_data'],
84+
auto_refresh=True,
85+
client_batch_size=64
86+
)
87+
88+
89+
def delete(self) -> None:
90+
self._client.delete_index(index_name=self._index_name)
91+
92+
def query(
93+
self,
94+
query: VectorIndexQuery,
95+
) -> VectorIndexQueryResult:
96+
response = self._client.index(
97+
self._index_name).search(
98+
context={
99+
'tensor':[
100+
{
101+
'vector': list(query.embedding),
102+
'weight' : 1
103+
}
104+
],
105+
},
106+
limit=query.top_k
107+
)
108+
109+
similarities, ids = [], []
110+
111+
for result in response['hits']:
112+
ids.append(result['_id'])
113+
114+
# Because it is similarity score
115+
similarities.append(1-result['_score'])
116+
117+
return VectorIndexQueryResult(similarities=similarities, ids=ids)
118+

evadb/third_party/vector_stores/utils.py

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from evadb.catalog.catalog_type import VectorStoreType
1616
from evadb.third_party.vector_stores.chromadb import ChromaDBVectorStore
1717
from evadb.third_party.vector_stores.faiss import FaissVectorStore
18+
from evadb.third_party.vector_stores.marqo import MarqoVectorStore
1819
from evadb.third_party.vector_stores.milvus import MilvusVectorStore
1920
from evadb.third_party.vector_stores.pinecone import PineconeVectorStore
2021
from evadb.third_party.vector_stores.qdrant import QdrantVectorStore
@@ -67,5 +68,11 @@ def init_vector_store(
6768
validate_kwargs(kwargs, allowed_params, required_params)
6869
return MilvusVectorStore(index_name, **kwargs)
6970

71+
elif vector_store_type == VectorStoreType.MARQO:
72+
from evadb.third_party.vector_stores.marqo import required_params
73+
74+
validate_kwargs(kwargs, required_params, required_params)
75+
return MarqoVectorStore(index_name, **kwargs)
76+
7077
else:
7178
raise Exception(f"Vector store {vector_store_type} not supported")

evadb/utils/generic_utils.py

+18
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,16 @@ def try_to_import_milvus_client():
593593
)
594594

595595

596+
def try_to_import_marqo_client():
597+
try:
598+
import marqo # noqa: F401
599+
except ImportError:
600+
raise ValueError(
601+
"""Could not import marqo python package.
602+
Please install it with `pip install marqo`."""
603+
)
604+
605+
596606
def is_qdrant_available() -> bool:
597607
try:
598608
try_to_import_qdrant_client()
@@ -633,6 +643,14 @@ def is_milvus_available() -> bool:
633643
return False
634644

635645

646+
def is_marqo_available() -> bool:
647+
try:
648+
try_to_import_marqo_client()
649+
return True
650+
except ValueError:
651+
return False
652+
653+
636654
##############################
637655
## UTILS
638656
##############################

setup.py

+3
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ def read(path, encoding="utf-8"):
116116

117117
milvus_libs = ["pymilvus>=2.3.0"]
118118

119+
marqo_libs = ["marqo"]
120+
119121

120122
postgres_libs = [
121123
"psycopg2",
@@ -177,6 +179,7 @@ def read(path, encoding="utf-8"):
177179
"chromadb": chromadb_libs,
178180
"milvus": milvus_libs,
179181
"weaviate": weaviate_libs,
182+
"marqo": marqo_libs,
180183
"postgres": postgres_libs,
181184
"ludwig": ludwig_libs,
182185
"sklearn": sklearn_libs,

0 commit comments

Comments
 (0)