From 7c5ae1245e0ff7ec597ddab416e7d9735b57f57c Mon Sep 17 00:00:00 2001 From: kennethmhc Date: Tue, 9 Jul 2024 17:09:00 +0200 Subject: [PATCH 1/2] [APPEND][FSTORE-1424] Feature logging fix _apply_transformation_function in spark (#1368) --- python/hsfs/engine/spark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/hsfs/engine/spark.py b/python/hsfs/engine/spark.py index 1a9fcd3872..d69bbd40f7 100644 --- a/python/hsfs/engine/spark.py +++ b/python/hsfs/engine/spark.py @@ -1190,7 +1190,7 @@ def add_cols_to_delta_table(self, feature_group, new_features): "spark.databricks.delta.schema.autoMerge.enabled", "true" ).save(feature_group.location) - def _apply_transformation_function(self, transformation_functions, dataset): + def _apply_transformation_function(self, transformation_functions, dataset, **kwargs): # generate transformation function expressions transformed_feature_names = [] transformation_fn_expressions = [] From 7210867b0a59cba43faa45c022c13c71ab2bd121 Mon Sep 17 00:00:00 2001 From: davitbzh <44586065+davitbzh@users.noreply.github.com> Date: Tue, 9 Jul 2024 18:06:07 +0200 Subject: [PATCH 2/2] [FSTORE-1105] Query constructor for snowflake schema (#1346) * left join * snowflake * test_join --------- Co-authored-by: davitbzh Co-authored-by: Dhananjay Mukhedkar <55157590+dhananjay-mk@users.noreply.github.com> --- python/hsfs/constructor/join.py | 2 +- python/hsfs/constructor/query.py | 16 ++++++++++++++-- python/tests/constructor/test_join.py | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/python/hsfs/constructor/join.py b/python/hsfs/constructor/join.py index ad74745250..f94ddb351b 100644 --- a/python/hsfs/constructor/join.py +++ b/python/hsfs/constructor/join.py @@ -47,7 +47,7 @@ def __init__( self._on = util.parse_features(on) self._left_on = util.parse_features(left_on) self._right_on = util.parse_features(right_on) - self._join_type = join_type or self.INNER + self._join_type = join_type or self.LEFT self._prefix = prefix def to_dict(self) -> Dict[str, Any]: diff --git a/python/hsfs/constructor/query.py b/python/hsfs/constructor/query.py index d14a92ac05..aeb6557cf1 100644 --- a/python/hsfs/constructor/query.py +++ b/python/hsfs/constructor/query.py @@ -239,7 +239,7 @@ def join( on: Optional[List[str]] = None, left_on: Optional[List[str]] = None, right_on: Optional[List[str]] = None, - join_type: Optional[str] = "inner", + join_type: Optional[str] = "left", prefix: Optional[str] = None, ) -> "Query": """Join Query with another Query. @@ -769,7 +769,7 @@ def featuregroups( """List of feature groups used in the query""" featuregroups = {self._left_feature_group} for join_obj in self.joins: - featuregroups.add(join_obj.query._left_feature_group) + self._fg_rec_add(join_obj, featuregroups) return list(featuregroups) @property @@ -809,6 +809,18 @@ def get_feature(self, feature_name: str) -> "Feature": """ return self._get_feature_by_name(feature_name)[0] + def _fg_rec_add(self, join_object, featuregroups): + """ + Recursively get a feature groups from nested join and add to featuregroups list. + + # Arguments + join_object: `Join object`. + """ + if len(join_object.query.joins) > 0: + for nested_join in join_object.query.joins: + self._fg_rec_add(nested_join, featuregroups) + featuregroups.add(join_object.query._left_feature_group) + def __getattr__(self, name: str) -> Any: try: return self.__getitem__(name) diff --git a/python/tests/constructor/test_join.py b/python/tests/constructor/test_join.py index d9acaff6ac..fc6124bf43 100644 --- a/python/tests/constructor/test_join.py +++ b/python/tests/constructor/test_join.py @@ -55,7 +55,7 @@ def test_from_response_json_basic_info(self, mocker, backend_fixtures): assert len(j._on) == 0 assert len(j._left_on) == 0 assert len(j._right_on) == 0 - assert j._join_type == "INNER" + assert j._join_type == "LEFT" assert j._prefix is None def test_from_response_json_left_join(self, mocker, backend_fixtures):