-
Notifications
You must be signed in to change notification settings - Fork 5.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
⚡️ Speed up method AstraDBVectorStoreComponent.get_database_object
by 1,269% in PR #6236 (LFOSS-492
)
#6640
⚡️ Speed up method AstraDBVectorStoreComponent.get_database_object
by 1,269% in PR #6236 (LFOSS-492
)
#6640
Conversation
…by 1,269% in PR #6236 (`LFOSS-492`) To optimize the provided code for better performance, we can implement a few strategies. Below is the refactored code with these performance optimizations. ### Key Changes.
if not self.api_endpoint_cache: | ||
self.api_endpoint_cache = self.get_api_endpoint_static_cached( | ||
token=self.token, | ||
environment=self.environment, | ||
api_endpoint=self.api_endpoint, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if not self.api_endpoint_cache: | |
self.api_endpoint_cache = self.get_api_endpoint_static_cached( | |
token=self.token, | |
environment=self.environment, | |
api_endpoint=self.api_endpoint, | |
if self.api_endpoint_cache is None: | |
self.token, self.environment, self.api_endpoint, self.database_name |
return self.get_api_endpoint_static( | ||
token=token, | ||
environment=environment, | ||
api_endpoint=api_endpoint, | ||
database_name=database_name, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return self.get_api_endpoint_static( | |
token=token, | |
environment=environment, | |
api_endpoint=api_endpoint, | |
database_name=database_name, | |
) | |
return self.get_api_endpoint_static(token, environment, api_endpoint, database_name) |
⚡️ Codeflash found optimizations for this PR📄 21% (0.21x) speedup for
|
Test | Status |
---|---|
⚙️ Existing Unit Tests | 🔘 None Found |
🌀 Generated Regression Tests | ✅ 7 Passed |
⏪ Replay Tests | 🔘 None Found |
🔎 Concolic Coverage Tests | 🔘 None Found |
📊 Tests Coverage | undefined |
🌀 Generated Regression Tests Details
import functools
from unittest.mock import patch
# imports
import pytest # used for our unit tests
# function to test
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.components.vectorstores.astradb import \
AstraDBVectorStoreComponent
# unit tests
@pytest.fixture
def setup_component():
# Fixture to set up the component with default attributes
component = AstraDBVectorStoreComponent(
token="default_token",
environment="default_environment",
api_endpoint="default_endpoint",
database_name="default_db"
)
return component
def test_different_input_combinations(setup_component):
# Test different valid inputs
component = setup_component
with patch.object(component, 'get_api_endpoint_static', return_value="endpoint_value"):
component.token = "tokenA"
component.environment = "envA"
component.api_endpoint = "endpointA"
component.database_name = "dbA"
codeflash_output = component.get_api_endpoint()
def test_edge_cases(setup_component):
# Test edge cases with special characters and long strings
component = setup_component
with patch.object(component, 'get_api_endpoint_static', return_value="endpoint_value"):
component.token = "!@#$"
component.environment = "%^&*"
component.api_endpoint = "()_+"
component.database_name = "{}|:"
codeflash_output = component.get_api_endpoint()
component.token = "a" * 1000
component.environment = "b" * 1000
component.api_endpoint = "c" * 1000
component.database_name = "d" * 1000
codeflash_output = component.get_api_endpoint()
import functools
import threading
# imports
import pytest # used for our unit tests
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.components.vectorstores.astradb import \
AstraDBVectorStoreComponent
# Mocking the LCVectorStoreComponent and get_api_endpoint_static for testing
class MockLCVectorStoreComponent:
def __init__(self, **kwargs):
pass
def get_api_endpoint_static(self, token, environment, api_endpoint, database_name):
return f"{token}-{environment}-{api_endpoint}-{database_name}"
AstraDBVectorStoreComponent.__bases__ = (MockLCVectorStoreComponent,)
# unit tests
# Basic Functionality
def test_missing_attributes():
component = AstraDBVectorStoreComponent()
with pytest.raises(AttributeError):
component.get_api_endpoint()
# Concurrency
def test_integration_with_parent_class():
component = AstraDBVectorStoreComponent(
token="valid_token",
environment="production",
api_endpoint="https://api.example.com",
database_name="main_db"
)
# State Verification
if self.keyspace: | ||
return self.strip_keyspace(self.keyspace) | ||
return None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if self.keyspace: | |
return self.strip_keyspace(self.keyspace) | |
return None | |
return self.strip_keyspace(self.keyspace) if self.keyspace else None |
) | ||
|
||
@staticmethod | ||
@functools.lru_cache(maxsize=128) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@functools.lru_cache(maxsize=128) | |
@functools.cache |
⚡️ Codeflash found optimizations for this PR📄 18% (0.18x) speedup for
|
Test | Status |
---|---|
⚙️ Existing Unit Tests | 🔘 None Found |
🌀 Generated Regression Tests | ✅ 1379 Passed |
⏪ Replay Tests | 🔘 None Found |
🔎 Concolic Coverage Tests | 🔘 None Found |
📊 Tests Coverage | undefined |
🌀 Generated Regression Tests Details
import functools
# imports
import pytest # used for our unit tests
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.components.vectorstores.astradb import \
AstraDBVectorStoreComponent
# unit tests
def test_basic_functionality():
# Non-empty keyspace with no whitespace
component = AstraDBVectorStoreComponent(keyspace="test_keyspace")
codeflash_output = component.get_keyspace()
# Non-empty keyspace with leading and trailing whitespace
component = AstraDBVectorStoreComponent(keyspace=" test_keyspace ")
codeflash_output = component.get_keyspace()
# Non-empty keyspace with only leading whitespace
component = AstraDBVectorStoreComponent(keyspace=" test_keyspace")
codeflash_output = component.get_keyspace()
# Non-empty keyspace with only trailing whitespace
component = AstraDBVectorStoreComponent(keyspace="test_keyspace ")
codeflash_output = component.get_keyspace()
def test_edge_cases():
# Empty string as keyspace
component = AstraDBVectorStoreComponent(keyspace="")
codeflash_output = component.get_keyspace()
# None as keyspace
component = AstraDBVectorStoreComponent(keyspace=None)
codeflash_output = component.get_keyspace()
# String with only whitespace
component = AstraDBVectorStoreComponent(keyspace=" ")
codeflash_output = component.get_keyspace()
# String with special characters
component = AstraDBVectorStoreComponent(keyspace=" @#$%^&*()_+ ")
codeflash_output = component.get_keyspace()
def test_type_handling():
# Integer as keyspace (should raise an error)
with pytest.raises(AttributeError):
component = AstraDBVectorStoreComponent(keyspace=12345)
component.get_keyspace()
# List as keyspace (should raise an error)
with pytest.raises(AttributeError):
component = AstraDBVectorStoreComponent(keyspace=["test_keyspace"])
component.get_keyspace()
# Dictionary as keyspace (should raise an error)
with pytest.raises(AttributeError):
component = AstraDBVectorStoreComponent(keyspace={"key": "value"})
component.get_keyspace()
def test_caching_behavior():
# Repeated calls with the same keyspace
component = AstraDBVectorStoreComponent(keyspace=" test_keyspace ")
codeflash_output = component.get_keyspace()
codeflash_output = component.get_keyspace()
# Different keyspaces to test cache eviction
for i in range(129):
component = AstraDBVectorStoreComponent(keyspace=f"keyspace{i}")
codeflash_output = component.get_keyspace()
def test_performance_and_scalability():
# Large keyspace string
large_keyspace = "a" * 10000
component = AstraDBVectorStoreComponent(keyspace=large_keyspace)
codeflash_output = component.get_keyspace()
# Very large number of different keyspaces
for i in range(1000):
component = AstraDBVectorStoreComponent(keyspace=f"keyspace{i}")
codeflash_output = component.get_keyspace()
def test_boundary_conditions():
# Minimum length keyspace
component = AstraDBVectorStoreComponent(keyspace="a")
codeflash_output = component.get_keyspace()
# Maximum length keyspace (feasible within reasonable limits)
max_length_keyspace = "a" * 1000
component = AstraDBVectorStoreComponent(keyspace=max_length_keyspace)
codeflash_output = component.get_keyspace()
def test_special_characters_and_unicode():
# Unicode characters in keyspace
component = AstraDBVectorStoreComponent(keyspace=" 测试键空间 ")
codeflash_output = component.get_keyspace()
# Keyspace with newline characters
component = AstraDBVectorStoreComponent(keyspace=" test\nkeyspace ")
codeflash_output = component.get_keyspace()
# Keyspace with tab characters
component = AstraDBVectorStoreComponent(keyspace=" test\tkeyspace ")
codeflash_output = component.get_keyspace()
def test_rare_edge_cases():
# Keyspace with embedded null characters
component = AstraDBVectorStoreComponent(keyspace="test\0keyspace")
codeflash_output = component.get_keyspace()
# Keyspace with mixed whitespace characters
component = AstraDBVectorStoreComponent(keyspace=" \t\n test_keyspace \n\t ")
codeflash_output = component.get_keyspace()
# Keyspace with escape sequences
component = AstraDBVectorStoreComponent(keyspace="test\\nkeyspace")
codeflash_output = component.get_keyspace()
# Keyspace with control characters
component = AstraDBVectorStoreComponent(keyspace="test\x1b[31mkeyspace")
codeflash_output = component.get_keyspace()
# Keyspace with extremely large whitespace padding
component = AstraDBVectorStoreComponent(keyspace=" " * 1000 + "test_keyspace")
codeflash_output = component.get_keyspace()
# Keyspace with non-printable characters
component = AstraDBVectorStoreComponent(keyspace="test\x07keyspace")
codeflash_output = component.get_keyspace()
# Keyspace with high Unicode characters
component = AstraDBVectorStoreComponent(keyspace="test\u200bkeyspace")
codeflash_output = component.get_keyspace()
# Keyspace with SQL injection-like patterns
component = AstraDBVectorStoreComponent(keyspace="test_keyspace; DROP TABLE users;")
codeflash_output = component.get_keyspace()
# Keyspace with JSON-like strings
component = AstraDBVectorStoreComponent(keyspace='{"key": "value"}')
codeflash_output = component.get_keyspace()
# Keyspace with HTML/XML-like content
component = AstraDBVectorStoreComponent(keyspace="<div>test_keyspace</div>")
codeflash_output = component.get_keyspace()
# Keyspace with path-like strings
component = AstraDBVectorStoreComponent(keyspace="/path/to/keyspace")
codeflash_output = component.get_keyspace()
# Keyspace with special whitespace characters
component = AstraDBVectorStoreComponent(keyspace="test\u00A0keyspace")
codeflash_output = component.get_keyspace()
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
import functools
# imports
import pytest # used for our unit tests
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.components.vectorstores.astradb import \
AstraDBVectorStoreComponent
# unit tests
# Test basic functionality
def test_basic_keyspace_no_whitespace():
component = AstraDBVectorStoreComponent()
component.keyspace = "keyspace1"
codeflash_output = component.get_keyspace()
def test_basic_keyspace_leading_whitespace():
component = AstraDBVectorStoreComponent()
component.keyspace = " keyspace2"
codeflash_output = component.get_keyspace()
def test_basic_keyspace_trailing_whitespace():
component = AstraDBVectorStoreComponent()
component.keyspace = "keyspace3 "
codeflash_output = component.get_keyspace()
def test_basic_keyspace_both_whitespace():
component = AstraDBVectorStoreComponent()
component.keyspace = " keyspace4 "
codeflash_output = component.get_keyspace()
# Test edge cases
def test_edge_empty_keyspace():
component = AstraDBVectorStoreComponent()
component.keyspace = ""
codeflash_output = component.get_keyspace()
def test_edge_none_keyspace():
component = AstraDBVectorStoreComponent()
component.keyspace = None
codeflash_output = component.get_keyspace()
def test_edge_whitespace_only_keyspace():
component = AstraDBVectorStoreComponent()
component.keyspace = " "
codeflash_output = component.get_keyspace()
def test_edge_special_characters_keyspace():
component = AstraDBVectorStoreComponent()
component.keyspace = " key$pace_5 "
codeflash_output = component.get_keyspace()
def test_edge_non_ascii_keyspace():
component = AstraDBVectorStoreComponent()
component.keyspace = " ключевое_пространство "
codeflash_output = component.get_keyspace()
# Test cache behavior
def test_cache_behavior_repeated_calls():
component = AstraDBVectorStoreComponent()
component.keyspace = " keyspace6 "
codeflash_output = component.get_keyspace()
codeflash_output = component.get_keyspace()
def test_cache_behavior_different_keyspaces_same_stripped_value():
component = AstraDBVectorStoreComponent()
component.keyspace = " keyspace7 "
codeflash_output = component.get_keyspace()
component.keyspace = "keyspace7 "
codeflash_output = component.get_keyspace()
# Test performance and scalability
def test_large_keyspace_string():
component = AstraDBVectorStoreComponent()
large_keyspace = " " + "a" * 10000 + " "
component.keyspace = large_keyspace
codeflash_output = component.get_keyspace()
def test_cache_limit():
component = AstraDBVectorStoreComponent()
keyspaces = [f" keyspace_{i} " for i in range(200)]
for keyspace in keyspaces:
component.keyspace = keyspace
codeflash_output = component.get_keyspace()
# Test invalid inputs
def test_invalid_input_integer():
component = AstraDBVectorStoreComponent()
component.keyspace = 12345
with pytest.raises(AttributeError):
component.get_keyspace()
def test_class_keyspace_set_and_retrieve():
component = AstraDBVectorStoreComponent()
component.keyspace = " keyspace10 "
codeflash_output = component.get_keyspace()
def test_class_keyspace_not_set():
component = AstraDBVectorStoreComponent()
codeflash_output = component.get_keyspace()
# Test side effects and state management
def test_strip_keyspace_does_not_modify_original():
component = AstraDBVectorStoreComponent()
original_keyspace = " keyspace11 "
component.keyspace = original_keyspace
component.get_keyspace()
# Test special cases
def test_keyspace_with_newline_characters():
component = AstraDBVectorStoreComponent()
component.keyspace = "\nkeyspace11\n"
codeflash_output = component.get_keyspace()
def test_keyspace_with_tab_characters():
component = AstraDBVectorStoreComponent()
component.keyspace = "\tkeyspace12\t"
codeflash_output = component.get_keyspace()
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
) | ||
|
||
@staticmethod | ||
@functools.lru_cache(maxsize=128) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@functools.lru_cache(maxsize=128) | |
@functools.lru_cache(maxsize=512) # Increased cache size for better performance on frequent look-ups |
⚡️ Codeflash found optimizations for this PR📄 53% (0.53x) speedup for
|
Test | Status |
---|---|
⚙️ Existing Unit Tests | 🔘 None Found |
🌀 Generated Regression Tests | ✅ 555 Passed |
⏪ Replay Tests | 🔘 None Found |
🔎 Concolic Coverage Tests | 🔘 None Found |
📊 Tests Coverage | undefined |
🌀 Generated Regression Tests Details
import functools
# imports
import pytest # used for our unit tests
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.components.vectorstores.astradb import \
AstraDBVectorStoreComponent
# unit tests
# Basic Functionality
def test_strip_keyspace_standard_input():
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(" keyspace ")
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(" keyspace")
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("keyspace ")
# Edge Cases
def test_strip_keyspace_empty_string():
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("")
def test_strip_keyspace_whitespace_only():
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(" ")
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(" ")
def test_strip_keyspace_no_whitespace():
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("keyspace")
# Mixed Whitespace Characters
def test_strip_keyspace_tabs_and_newlines():
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("\tkeyspace\t")
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("\nkeyspace\n")
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("\t\n keyspace \n\t")
# Non-String Input (should raise an error)
def test_strip_keyspace_non_string_input():
with pytest.raises(AttributeError):
AstraDBVectorStoreComponent.strip_keyspace(None)
with pytest.raises(AttributeError):
AstraDBVectorStoreComponent.strip_keyspace(123)
with pytest.raises(AttributeError):
AstraDBVectorStoreComponent.strip_keyspace(["keyspace"])
# Unicode and Special Characters
def test_strip_keyspace_unicode_whitespace():
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("\u2003keyspace\u2003") # Em space
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("\u3000keyspace\u3000") # Ideographic space
def test_strip_keyspace_special_characters():
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("key space")
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace("key\tspace")
# Large Scale Test Cases
def test_strip_keyspace_large_input_string():
large_input = " " * 1000000 + "keyspace" + " " * 1000000
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(large_input)
def test_strip_keyspace_large_input_with_mixed_content():
large_input = " a " * 1000000 + "keyspace" + " b " * 1000000
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(large_input)
# Performance and Scalability
def test_strip_keyspace_repeated_calls_same_input():
for _ in range(100):
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(" keyspace ")
def test_strip_keyspace_different_inputs_cache_size():
for i in range(150):
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(f" keyspace{i} ")
# Deterministic Behavior
def test_strip_keyspace_consistent_results():
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(" keyspace ")
codeflash_output = AstraDBVectorStoreComponent.strip_keyspace(" keyspace ")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
import functools
# imports
import pytest # used for our unit tests
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.components.vectorstores.astradb import \
AstraDBVectorStoreComponent
# unit tests
# Define the class to access the static method
component = AstraDBVectorStoreComponent()
def test_basic_functionality():
# Basic functionality tests
codeflash_output = component.strip_keyspace("keyspace")
codeflash_output = component.strip_keyspace(" keyspace ")
codeflash_output = component.strip_keyspace(" keyspace")
codeflash_output = component.strip_keyspace("keyspace ")
def test_edge_cases():
# Edge cases tests
codeflash_output = component.strip_keyspace("")
codeflash_output = component.strip_keyspace(" ")
with pytest.raises(AttributeError):
component.strip_keyspace(None)
with pytest.raises(AttributeError):
component.strip_keyspace(123)
with pytest.raises(AttributeError):
component.strip_keyspace(["keyspace"])
def test_special_characters():
# Special characters tests
codeflash_output = component.strip_keyspace("\tkeyspace\t")
codeflash_output = component.strip_keyspace("\nkeyspace\n")
codeflash_output = component.strip_keyspace("keyspace\n")
codeflash_output = component.strip_keyspace("\t\nkeyspace\t\n")
def test_unicode_and_multilingual_strings():
# Unicode and multilingual strings tests
codeflash_output = component.strip_keyspace(" ключевое пространство ")
codeflash_output = component.strip_keyspace(" 空間 ")
codeflash_output = component.strip_keyspace(" keyspace空間 ")
def test_performance_and_scalability():
# Performance and scalability tests
codeflash_output = component.strip_keyspace(" " * 10000 + "keyspace" + " " * 10000)
codeflash_output = component.strip_keyspace("key" + " " * 100000 + "space")
def test_cache_behavior():
# Cache behavior tests
# Repeated calls with the same input
for _ in range(100):
codeflash_output = component.strip_keyspace(" keyspace ")
# Cache size limit
for i in range(150):
component.strip_keyspace(f"keyspace{i}")
# After more than 128 unique calls, the first entry should be evicted
codeflash_output = component.strip_keyspace(" keyspace ")
def test_whitespace_characters_other_than_space():
# Whitespace characters other than space tests
codeflash_output = component.strip_keyspace("\t\t\t")
codeflash_output = component.strip_keyspace("\n\n\n")
codeflash_output = component.strip_keyspace("\t \n \t")
def test_strings_with_embedded_whitespace():
# Strings with embedded whitespace tests
codeflash_output = component.strip_keyspace(" key space ")
codeflash_output = component.strip_keyspace("\tkey\tspace\t")
codeflash_output = component.strip_keyspace("\nkey\nspace\n")
def test_strings_with_non_printable_characters():
# Strings with non-printable characters tests
codeflash_output = component.strip_keyspace("\x00keyspace\x00")
codeflash_output = component.strip_keyspace("\x01\x02keyspace\x03\x04")
codeflash_output = component.strip_keyspace(" \x00keyspace\x00 ")
def test_strings_with_escape_sequences():
# Strings with escape sequences tests
codeflash_output = component.strip_keyspace(" key\\nspace ")
codeflash_output = component.strip_keyspace(" key\\tspace ")
def test_strings_with_various_encodings():
# Strings with various encodings tests
codeflash_output = component.strip_keyspace(" \u006B\u0065\u0079\u0073\u0070\u0061\u0063\u0065 ")
codeflash_output = component.strip_keyspace(" \U0000006B\U00000065\U00000079\U00000073\U00000070\U00000061\U00000063\U00000065 ")
def test_strings_with_control_characters():
# Strings with control characters tests
codeflash_output = component.strip_keyspace(" keyspace\u0007 ")
codeflash_output = component.strip_keyspace("\u0007keyspace\u0007")
if __name__ == "__main__":
pytest.main()
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
⚡️ This pull request contains optimizations for PR #6236
If you approve this dependent PR, these changes will be merged into the original PR branch
LFOSS-492
.📄 1,269% (12.69x) speedup for
AstraDBVectorStoreComponent.get_database_object
insrc/backend/base/langflow/components/vectorstores/astradb.py
⏱️ Runtime :
9.04 milliseconds
→660 microseconds
(best of19
runs)📝 Explanation and details
To optimize the provided code for better performance, we can implement a few strategies.
Below is the refactored code with these performance optimizations.
Key Changes.
✅ Correctness verification report:
🌀 Generated Regression Tests Details