Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add SaveToFile component for DataFrame, Data and Message exports #6114

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
172 changes: 172 additions & 0 deletions src/backend/base/langflow/components/processing/save_to_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import json
from collections.abc import AsyncIterator, Iterator
from pathlib import Path

import pandas as pd

from langflow.custom import Component
from langflow.io import (
DataFrameInput,
DataInput,
DropdownInput,
MessageInput,
Output,
StrInput,
)
from langflow.schema import Data, DataFrame, Message


class SaveToFileComponent(Component):
display_name = "Save to File"
description = "Save DataFrames, Data, or Messages to various file formats."
icon = "save"
name = "SaveToFile"

# File format options for different types
DATA_FORMAT_CHOICES = ["csv", "excel", "json", "markdown"]
MESSAGE_FORMAT_CHOICES = ["txt", "json", "markdown"]

inputs = [
DropdownInput(
name="input_type",
display_name="Input Type",
options=["DataFrame", "Data", "Message"],
info="Select the type of input to save.",
value="DataFrame",
real_time_refresh=True,
),
DataFrameInput(
name="df",
display_name="DataFrame",
info="The DataFrame to save.",
dynamic=True,
show=True,
),
DataInput(
name="data",
display_name="Data",
info="The Data object to save.",
dynamic=True,
show=False,
),
MessageInput(
name="message",
display_name="Message",
info="The Message to save.",
dynamic=True,
show=False,
),
DropdownInput(
name="file_format",
display_name="File Format",
options=DATA_FORMAT_CHOICES,
info="Select the file format to save the input.",
real_time_refresh=True,
),
StrInput(
name="file_path",
display_name="File Path (including filename)",
info="The full file path (including filename and extension).",
value="./output",
),
]

outputs = [
Output(
name="confirmation",
display_name="Confirmation",
method="save_to_file",
info="Confirmation message after saving the file.",
),
]

def update_build_config(self, build_config, field_value, field_name=None):
# Hide/show dynamic fields based on the selected input type
if field_name == "input_type":
build_config["df"]["show"] = field_value == "DataFrame"
build_config["data"]["show"] = field_value == "Data"
build_config["message"]["show"] = field_value == "Message"

if field_value in ["DataFrame", "Data"]:
build_config["file_format"]["options"] = self.DATA_FORMAT_CHOICES
elif field_value == "Message":
build_config["file_format"]["options"] = self.MESSAGE_FORMAT_CHOICES

return build_config

def save_to_file(self) -> str:
input_type = self.input_type
file_format = self.file_format
file_path = Path(self.file_path).expanduser()

# Ensure the directory exists
if not file_path.parent.exists():
file_path.parent.mkdir(parents=True, exist_ok=True)

if input_type == "DataFrame":
dataframe = self.df
return self._save_dataframe(dataframe, file_path, file_format)
if input_type == "Data":
data = self.data
return self._save_data(data, file_path, file_format)
if input_type == "Message":
message = self.message
return self._save_message(message, file_path, file_format)

error_msg = f"Unsupported input type: {input_type}"
raise ValueError(error_msg)

def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:
if fmt == "csv":
dataframe.to_csv(path, index=False)
elif fmt == "excel":
dataframe.to_excel(path, index=False, engine="openpyxl")
elif fmt == "json":
dataframe.to_json(path, orient="records", indent=2)
elif fmt == "markdown":
path.write_text(dataframe.to_markdown(index=False), encoding="utf-8")
else:
error_msg = f"Unsupported DataFrame format: {fmt}"
raise ValueError(error_msg)

return f"DataFrame saved successfully as '{path}'"

def _save_data(self, data: Data, path: Path, fmt: str) -> str:
if fmt == "csv":
pd.DataFrame(data.data).to_csv(path, index=False)
elif fmt == "excel":
pd.DataFrame(data.data).to_excel(path, index=False, engine="openpyxl")
elif fmt == "json":
path.write_text(json.dumps(data.data, indent=2), encoding="utf-8")
elif fmt == "markdown":
path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding="utf-8")
else:
error_msg = f"Unsupported Data format: {fmt}"
raise ValueError(error_msg)

return f"Data saved successfully as '{path}'"

def _save_message(self, message: Message, path: Path, fmt: str) -> str:
if message.text is None:
content = ""
elif isinstance(message.text, AsyncIterator):
# AsyncIterator needs to be handled differently
error_msg = "AsyncIterator not supported"
raise ValueError(error_msg)
elif isinstance(message.text, Iterator):
# Convert iterator to string
content = " ".join(str(item) for item in message.text)
else:
content = str(message.text)

if fmt == "txt":
path.write_text(content, encoding="utf-8")
elif fmt == "json":
path.write_text(json.dumps({"message": content}, indent=2), encoding="utf-8")
elif fmt == "markdown":
path.write_text(f"**Message:**\n\n{content}", encoding="utf-8")
else:
error_msg = f"Unsupported Message format: {fmt}"
raise ValueError(error_msg)

return f"Message saved successfully as '{path}'"
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import json
from pathlib import Path
from unittest.mock import MagicMock, patch

import pandas as pd
import pytest
from langflow.components.processing.save_to_file import SaveToFileComponent
from langflow.schema import Data, Message

from tests.base import ComponentTestBaseWithoutClient


class TestSaveToFileComponent(ComponentTestBaseWithoutClient):
@pytest.fixture(autouse=True)
def setup_and_teardown(self):
"""Setup and teardown for each test."""
# Setup
test_files = [
"./test_output.csv",
"./test_output.xlsx",
"./test_output.json",
"./test_output.md",
"./test_output.txt",
]
# Teardown
yield
# Delete test files after each test
for file_path in test_files:
path = Path(file_path)
if path.exists():
path.unlink()

@pytest.fixture
def component_class(self):
"""Return the component class to test."""
return SaveToFileComponent

@pytest.fixture
def default_kwargs(self):
"""Return the default kwargs for the component."""
sample_df = pd.DataFrame([{"col1": 1, "col2": "a"}, {"col1": 2, "col2": "b"}])
return {"input_type": "DataFrame", "df": sample_df, "file_format": "csv", "file_path": "./test_output.csv"}

@pytest.fixture
def file_names_mapping(self):
"""Return the file names mapping for different versions."""
return [] # New component

def test_basic_setup(self, component_class, default_kwargs):
"""Test basic component initialization."""
component = component_class()
component.set_attributes(default_kwargs)
assert component.input_type == "DataFrame"
assert component.file_format == "csv"
assert component.file_path == "./test_output.csv"

def test_update_build_config_dataframe(self, component_class):
"""Test build config update for DataFrame input type."""
component = component_class()
build_config = {
"df": {"show": False},
"data": {"show": False},
"message": {"show": False},
"file_format": {"options": []},
}

updated_config = component.update_build_config(build_config, "DataFrame", "input_type")

assert updated_config["df"]["show"] is True
assert updated_config["data"]["show"] is False
assert updated_config["message"]["show"] is False
assert set(updated_config["file_format"]["options"]) == set(component.DATA_FORMAT_CHOICES)

def test_save_message(self, component_class):
"""Test saving Message to different formats."""
test_cases = [
("txt", "Test message"),
("json", json.dumps({"message": "Test message"}, indent=2)),
("markdown", "**Message:**\n\nTest message"),
]

for fmt, expected_content in test_cases:
mock_file = MagicMock()
mock_parent = MagicMock()
mock_parent.exists.return_value = True
mock_file.parent = mock_parent
mock_file.expanduser.return_value = mock_file

# Mock Path at the module level where it's imported
with patch("langflow.components.processing.save_to_file.Path") as mock_path:
mock_path.return_value = mock_file

component = component_class()
component.set_attributes(
{
"input_type": "Message",
"message": Message(text="Test message"),
"file_format": fmt,
"file_path": f"./test_output.{fmt}",
}
)

result = component.save_to_file()

mock_file.write_text.assert_called_once_with(expected_content, encoding="utf-8")
assert "saved successfully" in result

def test_save_data(self, component_class):
"""Test saving Data object to JSON."""
test_data = {"col1": ["value1"], "col2": ["value2"]}

mock_file = MagicMock()
mock_parent = MagicMock()
mock_parent.exists.return_value = True
mock_file.parent = mock_parent
mock_file.expanduser.return_value = mock_file

with patch("langflow.components.processing.save_to_file.Path") as mock_path:
mock_path.return_value = mock_file

component = component_class()
component.set_attributes(
{
"input_type": "Data",
"data": Data(data=test_data),
"file_format": "json",
"file_path": "./test_output.json",
}
)

result = component.save_to_file()

expected_json = json.dumps(test_data, indent=2)
mock_file.write_text.assert_called_once_with(expected_json, encoding="utf-8")
assert "saved successfully" in result

def test_directory_creation(self, component_class, default_kwargs):
"""Test directory creation if it doesn't exist."""
mock_file = MagicMock()
mock_parent = MagicMock()
mock_parent.exists.return_value = False
mock_file.parent = mock_parent
mock_file.expanduser.return_value = mock_file

with patch("langflow.components.processing.save_to_file.Path") as mock_path:
mock_path.return_value = mock_file
with patch.object(pd.DataFrame, "to_csv") as mock_to_csv:
component = component_class()
component.set_attributes(default_kwargs)

result = component.save_to_file()

mock_parent.mkdir.assert_called_once_with(parents=True, exist_ok=True)
assert mock_to_csv.called
assert "saved successfully" in result

def test_invalid_input_type(self, default_kwargs):
"""Test handling of invalid input type."""
component = SaveToFileComponent()
invalid_kwargs = default_kwargs.copy() # Create a copy to modify
invalid_kwargs["input_type"] = "InvalidType"
component.set_attributes(invalid_kwargs)

with pytest.raises(ValueError, match="Unsupported input type"):
component.save_to_file()
Loading