Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add LangChain vector store adapter for CrateDB #6011

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions docs/docs/Components/components-vector-stores.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,30 @@ For more information, see the [Couchbase documentation](https://docs.couchbase.c
|----------------|------------------------|--------------------------------|
| vector_store | CouchbaseVectorStore | A Couchbase vector store instance configured with the specified parameters. |

## CrateDB

This component creates a CrateDB Vector Store with search capabilities.
For more information, see the documentation about the
[CrateDB LangChain adapter](https://cratedb.com/docs/guide/integrate/langchain/).

### Inputs

| Name | Type | Description |
|----------------------------------|---------------|------------------------------------------------------------------|
| collection_name | String | The name of the collection. Default: "langflow". |
| search_query | String | The query to search for in the vector store. |
| ingest_data | Data | The data to ingest into the vector store (list of Data objects). |
| embedding | Embeddings | The embedding function to use for the vector store. |
| server_url | String | SQLAlchemy URL to connect to CrateDB. |
| search_type | String | Type of search to perform: "Similarity" or "MMR". |
| number_of_results | Integer | Number of results to return from the search. Default: 10. |

### Outputs

| Name | Type | Description |
|----------------|--------------------|-------------------------------|
| vector_store | CrateDBVectorStore | CrateDB vector store instance |
| search_results | List[Data] | Results of similarity search |

## Elasticsearch

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ dependencies = [
"langchain-ollama==0.2.1",
"langchain-sambanova==0.1.0",
"langchain-community~=0.3.10",
"langchain-cratedb<0.2",
"sqlalchemy[aiosqlite,postgresql_psycopg2binary,postgresql_psycopgbinary]>=2.0.38,<3.0.0",
"atlassian-python-api==3.41.16",
"mem0ai==0.1.34",
Expand Down
2 changes: 2 additions & 0 deletions src/backend/base/langflow/components/vectorstores/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .chroma import ChromaVectorStoreComponent
from .clickhouse import ClickhouseVectorStoreComponent
from .couchbase import CouchbaseVectorStoreComponent
from .cratedb import CrateDBVectorStoreComponent
from .elasticsearch import ElasticsearchVectorStoreComponent
from .faiss import FaissVectorStoreComponent
from .hcd import HCDVectorStoreComponent
Expand All @@ -30,6 +31,7 @@
"ChromaVectorStoreComponent",
"ClickhouseVectorStoreComponent",
"CouchbaseVectorStoreComponent",
"CrateDBVectorStoreComponent",
"ElasticsearchVectorStoreComponent",
"FaissVectorStoreComponent",
"HCDVectorStoreComponent",
Expand Down
90 changes: 90 additions & 0 deletions src/backend/base/langflow/components/vectorstores/cratedb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import typing as t

from langchain_cratedb import CrateDBVectorStore

from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
from langflow.helpers import docs_to_data
from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
from langflow.schema import Data


class CrateDBVectorStoreComponent(LCVectorStoreComponent):
display_name = "CrateDBVector"
description = "CrateDB Vector Store with search capabilities"
name = "CrateDB"
icon = "CrateDB"

inputs = [
SecretStrInput(name="server_url", display_name="CrateDB SQLAlchemy URL", required=True),
StrInput(name="collection_name", display_name="Table", required=True),
*LCVectorStoreComponent.inputs,
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"], required=True),
IntInput(
name="number_of_results",
display_name="Number of Results",
info="Number of results to return.",
value=4,
advanced=True,
),
]

@check_cached_vector_store
def build_vector_store(self) -> CrateDBVectorStore:
documents = []
for _input in self.ingest_data or []:
if isinstance(_input, Data):
documents.append(_input.to_lc_document())
else:
documents.append(_input)

connection_string = self.server_url or "crate://"

if documents:
store = CrateDBVectorStore.from_documents(
embedding=self.embedding,
documents=documents,
collection_name=self.collection_name,
connection=connection_string,
)
else:
store = CrateDBVectorStore.from_existing_index(
embedding=self.embedding,
collection_name=self.collection_name,
connection=connection_string,
)

return store

def search_documents(self) -> list[Data]:
vector_store = self.build_vector_store()

if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
docs = vector_store.similarity_search(
query=self.search_query,
k=self.number_of_results,
)

data = docs_to_data(docs)
self.status = data
return data
return []


def cratedb_collection_to_data(embedding_documents: list[t.Any]):
"""Converts a collection of CrateDB vectors into a list of data.

Args:
embedding_documents (dict): A list of EmbeddingStore instances.

Returns:
list: A list of data, where each record represents a document in the collection.
"""
data = []
for doc in embedding_documents:
data_dict = {
"id": doc.id,
"text": doc.document,
}
data_dict.update(doc.cmetadata)
data.append(Data(**data_dict))
return data
Empty file.
Loading
Loading