Skip to content

Commit

Permalink
fix: Improve path handling and type annotations in FaissVectorStoreCo…
Browse files Browse the repository at this point in the history
…mponent (#6081)

* 📝 (faiss.py): import Path and List modules for better type hinting and file path handling
🐛 (faiss.py): fix issue with building vector store when persist_directory is not provided
🐛 (faiss.py): fix issue with loading FAISS index when index file does not exist
📝 (faiss.py): add type hints for search_documents method parameters and return value
📝 (faiss.py): remove unnecessary logging statements from search_documents method

* [autofix.ci] apply automated fixes

* 📝 (faiss.py): add 'required' flag to the 'Persist Directory' input field to ensure it is mandatory for the user to provide a value

* 🔧 (faiss.py): refactor build_vector_store method to handle persist_directory more efficiently
🔧 (faiss.py): refactor search_documents method to handle persist_directory more efficiently

* [autofix.ci] apply automated fixes

* 🔧 (faiss.py): refactor get_persist_directory method to return resolved persist directory path or current directory if not set
♻️ (faiss.py): refactor build_vector_store and search_documents methods to use get_persist_directory method for path resolution

* ♻️ (faiss.py): refactor resolve_path method to be static and return a string instead of Path object for consistency and clarity

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
  • Loading branch information
Cristhianzl and autofix-ci[bot] authored Feb 12, 2025
1 parent d98d377 commit fda2f17
Showing 1 changed file with 34 additions and 28 deletions.
62 changes: 34 additions & 28 deletions src/backend/base/langflow/components/vectorstores/faiss.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

from langchain_community.vectorstores import FAISS

from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
Expand Down Expand Up @@ -44,16 +46,30 @@ class FaissVectorStoreComponent(LCVectorStoreComponent):
),
]

@staticmethod
def resolve_path(path: str) -> str:
"""Resolve the path relative to the Langflow root.
Args:
path: The path to resolve
Returns:
str: The resolved path as a string
"""
return str(Path(path).resolve())

def get_persist_directory(self) -> Path:
"""Returns the resolved persist directory path or the current directory if not set."""
if self.persist_directory:
return Path(self.resolve_path(self.persist_directory))
return Path()

@check_cached_vector_store
def build_vector_store(self) -> FAISS:
"""Builds the FAISS object."""
if not self.persist_directory:
msg = "Folder path is required to save the FAISS index."
raise ValueError(msg)
path = self.resolve_path(self.persist_directory)
path = self.get_persist_directory()
path.mkdir(parents=True, exist_ok=True)

documents = []

for _input in self.ingest_data or []:
if isinstance(_input, Data):
documents.append(_input.to_lc_document())
Expand All @@ -62,41 +78,31 @@ def build_vector_store(self) -> FAISS:

faiss = FAISS.from_documents(documents=documents, embedding=self.embedding)
faiss.save_local(str(path), self.index_name)

return faiss

def search_documents(self) -> list[Data]:
"""Search for documents in the FAISS vector store."""
if not self.persist_directory:
msg = "Folder path is required to load the FAISS index."
raise ValueError(msg)
path = self.resolve_path(self.persist_directory)

vector_store = FAISS.load_local(
folder_path=path,
embeddings=self.embedding,
index_name=self.index_name,
allow_dangerous_deserialization=self.allow_dangerous_deserialization,
)
path = self.get_persist_directory()
index_path = path / f"{self.index_name}.faiss"

if not index_path.exists():
vector_store = self.build_vector_store()
else:
vector_store = FAISS.load_local(
folder_path=str(path),
embeddings=self.embedding,
index_name=self.index_name,
allow_dangerous_deserialization=self.allow_dangerous_deserialization,
)

if not vector_store:
msg = "Failed to load the FAISS index."
raise ValueError(msg)

self.log(f"Search input: {self.search_query}")
self.log(f"Number of results: {self.number_of_results}")

if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
docs = vector_store.similarity_search(
query=self.search_query,
k=self.number_of_results,
)

self.log(f"Retrieved documents: {len(docs)}")

data = docs_to_data(docs)
self.log(f"Converted documents to data: {len(data)}")
self.log(data)
return data # Return the search results data
self.log("No search input provided. Skipping search.")
return docs_to_data(docs)
return []

0 comments on commit fda2f17

Please sign in to comment.