Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Weaviate integration #1360

Merged
merged 17 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion evadb/catalog/catalog_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ class VectorStoreType(EvaDBEnum):
PINECONE # noqa: F821
PGVECTOR # noqa: F821
CHROMADB # noqa: F821
WEAVIATE # noqa: F821
MILVUS # noqa: F821


class VideoColumnName(EvaDBEnum):
name # noqa: F821
id # noqa: F821
Expand Down
2 changes: 2 additions & 0 deletions evadb/evadb_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@
"MILVUS_PASSWORD": "",
"MILVUS_DB_NAME": "",
"MILVUS_TOKEN": "",
"WEAVIATE_API_KEY": "",
"WEAVIATE_API_URL": "",
}
11 changes: 11 additions & 0 deletions evadb/executor/executor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,17 @@ def handle_vector_store_params(
),
"PINECONE_ENV": catalog().get_configuration_catalog_value("PINECONE_ENV"),
}
elif vector_store_type == VectorStoreType.WEAVIATE:
# Weaviate Configuration
# Weaviate API key and url Can be obtained from cluster details on Weaviate Cloud Services (WCS) dashboard
return {
"WEAVIATE_API_KEY": catalog().get_configuration_catalog_value(
"WEAVIATE_API_KEY"
),
"WEAVIATE_API_URL": catalog().get_configuration_catalog_value(
"WEAVIATE_API_URL"
),
}
elif vector_store_type == VectorStoreType.MILVUS:
return {
"MILVUS_URI": catalog().get_configuration_catalog_value("MILVUS_URI"),
Expand Down
3 changes: 2 additions & 1 deletion evadb/interfaces/relational/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ def create_vector_index(
index_name (str): Name of the index.
table_name (str): Name of the table.
expr (str): Expression used to build the vector index.
using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `MILVUS`.

using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS`.

Returns:
EvaDBCursor: The EvaDBCursor object.
Expand Down
3 changes: 2 additions & 1 deletion evadb/parser/evadb.lark
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function_metadata_key: uid

function_metadata_value: constant

vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB | MILVUS)
vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB | WEAVIATE | MILVUS)

index_elem: ("(" uid_list ")"
| "(" function_call ")")
Expand Down Expand Up @@ -448,6 +448,7 @@ QDRANT: "QDRANT"i
PINECONE: "PINECONE"i
PGVECTOR: "PGVECTOR"i
CHROMADB: "CHROMADB"i
WEAVIATE: "WEAVIATE"i
MILVUS: "MILVUS"i

// Computer vision tasks
Expand Down
2 changes: 2 additions & 0 deletions evadb/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ def vector_store_type(self, tree):
vector_store_type = VectorStoreType.PGVECTOR
elif str.upper(token) == "CHROMADB":
vector_store_type = VectorStoreType.CHROMADB
elif str.upper(token) == "WEAVIATE":
vector_store_type = VectorStoreType.WEAVIATE
elif str.upper(token) == "MILVUS":
vector_store_type = VectorStoreType.MILVUS
return vector_store_type
Expand Down
73 changes: 73 additions & 0 deletions evadb/third_party/vector_stores/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,76 @@ def query(self, query: VectorIndexQuery) -> VectorIndexQueryResult:
def delete(self):
"""delete an index"""
...

def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: dict, properties: list) -> None:
"""
Create a Weaviate class with the specified configuration.

Args:
class_name (str): The name of the class to create, e.g., "Article".
vectorizer (str): The vectorizer module to use, e.g., "text2vec-cohere".
module_config (dict): Configuration for vectorizer and generative module, e.g.,
{
"text2vec-cohere": {
"model": "embed-multilingual-v2.0",
},
}
properties (list): List of dictionaries specifying class properties, e.g.,
[
{
"name": "title",
"dataType": ["text"]
},
{
"name": "body",
"dataType": ["text"]
},
]

Returns:
None
"""
# Implement the logic to create a Weaviate class with the given parameters.
...

def delete_weaviate_class(self, class_name: str) -> None:
"""
Delete a Weaviate class and its data.

Args:
class_name (str): The name of the Weaviate class to delete.

Returns:
None
"""
# Implement the logic to delete a Weaviate class and its data.
...

def add_to_weaviate_class(self, class_name: str, data_objects: List[dict]) -> None:
"""
Add objects to the specified Weaviate class.

Args:
class_name (str): The name of the Weaviate class to add objects to.
data_objects (List[dict]): A list of dictionaries, where each dictionary contains property names and values.

Returns:
None
"""
# Implement the logic to add payloads to a Weaviate class.
...

def query_weaviate_class(self, class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]:
"""
Perform a similarity-based search in Weaviate.

Args:
class_name (str): The name of the Weaviate class to perform the search on.
properties_to_retrieve (List[str]): A list of property names to retrieve.
query (VectorIndexQuery): A query object for similarity search, containing the query vector and top_k.

Returns:
List[dict]: A list of dictionaries containing the retrieved properties.
"""
# Implement the logic to query a Weaviate class for similar vectors.
...
8 changes: 8 additions & 0 deletions evadb/third_party/vector_stores/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from evadb.third_party.vector_stores.milvus import MilvusVectorStore
from evadb.third_party.vector_stores.pinecone import PineconeVectorStore
from evadb.third_party.vector_stores.qdrant import QdrantVectorStore
from evadb.third_party.vector_stores.weaviate import WeaviateVectorStore
from evadb.utils.generic_utils import validate_kwargs


Expand Down Expand Up @@ -51,6 +52,12 @@ def init_vector_store(
validate_kwargs(kwargs, required_params, required_params)
return ChromaDBVectorStore(index_name, **kwargs)

elif vector_store_type == VectorStoreType.WEAVIATE:
from evadb.third_party.vector_stores.weaviate import required_params

validate_kwargs(kwargs, required_params, required_params)
return WeaviateVectorStore(index_name, **kwargs)

elif vector_store_type == VectorStoreType.MILVUS:
from evadb.third_party.vector_stores.milvus import (
allowed_params,
Expand All @@ -59,5 +66,6 @@ def init_vector_store(

validate_kwargs(kwargs, allowed_params, required_params)
return MilvusVectorStore(index_name, **kwargs)

else:
raise Exception(f"Vector store {vector_store_type} not supported")
173 changes: 173 additions & 0 deletions evadb/third_party/vector_stores/weaviate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import List

from evadb.third_party.vector_stores.types import (
VectorIndexQuery,
VectorStore,
)
from evadb.utils.generic_utils import try_to_import_weaviate_client

required_params = []
_weaviate_init_done = False


class WeaviateVectorStore(VectorStore):
def __init__(self, **kwargs) -> None:
try_to_import_weaviate_client()
global _weaviate_init_done

# Get the API key.
self._api_key = kwargs.get("WEAVIATE_API_KEY")

if not self._api_key:
self._api_key = os.environ.get("WEAVIATE_API_KEY")

assert (
self._api_key
), "Please set your Weaviate API key in evadb.yml file (third_party, weaviate_api_key) or " \
xzdandy marked this conversation as resolved.
Show resolved Hide resolved
"environment variable (WEAVIATE_API_KEY). It can be found at the Details tab in WCS Dashboard."

# Get the API Url.
self._api_url = kwargs.get("WEAVIATE_API_URL")

if not self._api_url:
self._api_url = os.environ.get("WEAVIATE_API_URL")

assert (
self._api_url
), "Please set your Weaviate API Url in evadb.yml file (third_party, weaviate_api_url) or " \
"environment variable (WEAVIATE_API_URL). It can be found at the Details tab in WCS Dashboard."

if not _weaviate_init_done:
# Initialize weaviate client
import weaviate

client = weaviate.Client(
url=self._api_url,
auth_client_secret=weaviate.AuthApiKey(api_key=self._api_key),
)
client.schema.get()

_weaviate_init_done = True

self._client = client

def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: dict, properties: list) -> None:
# In Weaviate, vector index creation and management is not explicitly done like Pinecone
# Need to typically define a property in the schema to hold vectors and insert data accordingly

"""
Create a Weaviate class with the specified configuration.

Args:
class_name (str): The name of the class to create, e.g., "Article".
vectorizer (str): The vectorizer module to use, e.g., "text2vec-cohere".
module_config (dict): Configuration for vectorizer and generative module, e.g.,
{
"text2vec-cohere": {
"model": "embed-multilingual-v2.0",
},
}
properties (list): List of dictionaries specifying class properties, e.g.,
[
{
"name": "title",
"dataType": ["text"]
},
{
"name": "body",
"dataType": ["text"]
},
]

Returns:
None
"""
# Check if the class already exists
if self._client.schema.exists(class_name):
self._client.schema.delete_class(class_name)

# Define the class object with provided parameters
class_obj = {
"class": class_name,
"vectorizer": vectorizer,
"moduleConfig": module_config,
"properties": properties
}

# Call the Weaviate API to create the class
self._client.schema.create_class(class_obj)

def delete_weaviate_class(self, class_name: str) -> None:
"""
Delete a Weaviate class and its data.

Args:
class_name (str): The name of the Weaviate class to delete.

Returns:
None
"""
# Call the Weaviate API to delete the class
self._client.schema.delete_class(class_name)

def add_to_weaviate_class(self, class_name: str, data_objects: List[dict]) -> None:
"""
Add objects to the specified Weaviate class.

Args:
class_name (str): The name of the Weaviate class to add objects to.
data_objects (List[dict]): A list of dictionaries,
where each dictionary contains property names and values.

Returns:
None
"""
# Iterate over each data object and add it to the Weaviate class
for data_object in data_objects:
self._client.data_object.create(data_object, class_name)

def query_weaviate_class(self, class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]:
"""
Perform a similarity-based search in Weaviate.

Args:
class_name (str): The name of the Weaviate class to perform the search on.
properties_to_retrieve (List[str]): A list of property names to retrieve.
query (VectorIndexQuery): A query object for similarity search, containing the query vector and top_k.

Returns:
List[dict]: A list of dictionaries containing the retrieved properties.
"""
# Define the similarity search query
response = (
self._client.query
.get(class_name, properties_to_retrieve)
.with_near_vector({
"vector": query.embedding
})
.with_limit(query.top_k)
.with_additional(["distance"])
.do()
)

data = response.get('data', {})

# Extract the results
results = data['Get'][class_name]

return results
14 changes: 14 additions & 0 deletions evadb/utils/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,14 @@ def try_to_import_chromadb_client():
Please install it with 'pip install chromadb`."""
)

def try_to_import_weaviate_client():
try:
import weaviate # noqa: F401
except ImportError:
raise ValueError(
"""Could not import weaviate python package.
Please install it with 'pip install weaviate-client`."""
)

def try_to_import_milvus_client():
try:
Expand Down Expand Up @@ -606,6 +614,12 @@ def is_chromadb_available() -> bool:
except ValueError: # noqa: E722
return False

def is_weaviate_available() -> bool:
try:
try_to_import_weaviate_client()
return True
except ValueError: # noqa: E722
return False

def is_milvus_available() -> bool:
try:
Expand Down
2 changes: 2 additions & 0 deletions script/formatting/spelling.txt
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,7 @@ WH
WIP
WMV
WeakValueDictionary
WEAVIATE
XGBoost
XdistTests
Xeon
Expand Down Expand Up @@ -1814,6 +1815,7 @@ wal
warmup
wb
weakref
weaviate
westbrae
wget
whitespaces
Expand Down
Loading