Skip to content

Commit

Permalink
feat: Add LangChain vector store adapter for CrateDB
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Feb 12, 2025
1 parent ec445ce commit f19dcd7
Show file tree
Hide file tree
Showing 11 changed files with 635 additions and 0 deletions.
24 changes: 24 additions & 0 deletions docs/docs/Components/components-vector-stores.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,30 @@ For more information, see the [Couchbase documentation](https://docs.couchbase.c
|----------------|------------------------|--------------------------------|
| vector_store | CouchbaseVectorStore | A Couchbase vector store instance configured with the specified parameters. |

## CrateDB

This component creates a CrateDB Vector Store with search capabilities.
For more information, see the documentation about the
[CrateDB LangChain adapter](https://cratedb.com/docs/guide/integrate/langchain/).

### Inputs

| Name | Type | Description |
|----------------------------------|---------------|------------------------------------------------------------------|
| collection_name | String | The name of the collection. Default: "langflow". |
| search_query | String | The query to search for in the vector store. |
| ingest_data | Data | The data to ingest into the vector store (list of Data objects). |
| embedding | Embeddings | The embedding function to use for the vector store. |
| server_url | String | SQLAlchemy URL to connect to CrateDB. |
| search_type | String | Type of search to perform: "Similarity" or "MMR". |
| number_of_results | Integer | Number of results to return from the search. Default: 10. |

### Outputs

| Name | Type | Description |
|----------------|--------------------|-------------------------------|
| vector_store | CrateDBVectorStore | CrateDB vector store instance |
| search_results | List[Data] | Results of similarity search |

## Elasticsearch

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ dependencies = [
"langchain-ollama==0.2.1",
"langchain-sambanova==0.1.0",
"langchain-community~=0.3.10",
"langchain-cratedb<0.2",
"sqlalchemy[aiosqlite,postgresql_psycopg2binary,postgresql_psycopgbinary]>=2.0.38,<3.0.0",
"atlassian-python-api==3.41.16",
"mem0ai==0.1.34",
Expand Down
2 changes: 2 additions & 0 deletions src/backend/base/langflow/components/vectorstores/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .chroma import ChromaVectorStoreComponent
from .clickhouse import ClickhouseVectorStoreComponent
from .couchbase import CouchbaseVectorStoreComponent
from .cratedb import CrateDBVectorStoreComponent
from .elasticsearch import ElasticsearchVectorStoreComponent
from .faiss import FaissVectorStoreComponent
from .hcd import HCDVectorStoreComponent
Expand All @@ -30,6 +31,7 @@
"ChromaVectorStoreComponent",
"ClickhouseVectorStoreComponent",
"CouchbaseVectorStoreComponent",
"CrateDBVectorStoreComponent",
"ElasticsearchVectorStoreComponent",
"FaissVectorStoreComponent",
"HCDVectorStoreComponent",
Expand Down
90 changes: 90 additions & 0 deletions src/backend/base/langflow/components/vectorstores/cratedb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import typing as t

from langchain_cratedb import CrateDBVectorStore

from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
from langflow.helpers import docs_to_data
from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
from langflow.schema import Data


class CrateDBVectorStoreComponent(LCVectorStoreComponent):
display_name = "CrateDBVector"
description = "CrateDB Vector Store with search capabilities"
name = "CrateDB"
icon = "CrateDB"

inputs = [
SecretStrInput(name="server_url", display_name="CrateDB SQLAlchemy URL", required=True),
StrInput(name="collection_name", display_name="Table", required=True),
*LCVectorStoreComponent.inputs,
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"], required=True),
IntInput(
name="number_of_results",
display_name="Number of Results",
info="Number of results to return.",
value=4,
advanced=True,
),
]

@check_cached_vector_store
def build_vector_store(self) -> CrateDBVectorStore:
documents = []
for _input in self.ingest_data or []:
if isinstance(_input, Data):
documents.append(_input.to_lc_document())
else:
documents.append(_input)

connection_string = self.server_url or "crate://"

if documents:
store = CrateDBVectorStore.from_documents(
embedding=self.embedding,
documents=documents,
collection_name=self.collection_name,
connection=connection_string,
)
else:
store = CrateDBVectorStore.from_existing_index(
embedding=self.embedding,
collection_name=self.collection_name,
connection=connection_string,
)

return store

def search_documents(self) -> list[Data]:
vector_store = self.build_vector_store()

if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
docs = vector_store.similarity_search(
query=self.search_query,
k=self.number_of_results,
)

data = docs_to_data(docs)
self.status = data
return data
return []


def cratedb_collection_to_data(embedding_documents: list[t.Any]):
"""Converts a collection of CrateDB vectors into a list of data.
Args:
embedding_documents (dict): A list of EmbeddingStore instances.
Returns:
list: A list of data, where each record represents a document in the collection.
"""
data = []
for doc in embedding_documents:
data_dict = {
"id": doc.id,
"text": doc.document,
}
data_dict.update(doc.cmetadata)
data.append(Data(**data_dict))
return data
Empty file.
Loading

0 comments on commit f19dcd7

Please sign in to comment.