-
Notifications
You must be signed in to change notification settings - Fork 112
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SNOW-1692064: Implement DataFrame/Series align for axis = 0 (#2483)
<!--- Please answer these questions before creating your pull request. Thanks! ---> 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. <!--- In this section, please add a Snowflake Jira issue number. Note that if a corresponding GitHub issue exists, you should still include the Snowflake Jira issue number. For example, for GitHub issue #1400, you should add "SNOW-1335071" here. ---> Fixes SNOW-1692064 2. Fill out the following pre-review checklist: - [ x I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. - [x] I acknowledge that I have ensured my changes to be thread-safe. Follow the link for more information: [Thread-safe Developer Guidelines](https://docs.google.com/document/d/162d_i4zZ2AfcGRXojj0jByt8EUq-DrSHPPnTa4QvwbA/edit#bookmark=id.e82u4nekq80k) 3. Please describe how your code solves the related issue. Support for DataFrame/Series align for axis = 0 and default ``fill_value`` of np.nan. --------- Signed-off-by: Labanya Mukhopadhyay <[email protected]>
- Loading branch information
1 parent
db1b634
commit fb39324
Showing
14 changed files
with
820 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
95 changes: 95 additions & 0 deletions
95
src/snowflake/snowpark/modin/plugin/_internal/align_utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# | ||
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. | ||
# | ||
|
||
from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame | ||
from snowflake.snowpark.modin.plugin._internal.join_utils import align_on_index | ||
|
||
|
||
def align_axis_0_left( | ||
frame: InternalFrame, other_frame: InternalFrame, join: str | ||
) -> tuple[InternalFrame, InternalFrame, list[str], list[str]]: | ||
""" | ||
Gets the left align results. | ||
Args: | ||
frame: original frame | ||
other_frame: other frame | ||
join: type of alignment to be performed. | ||
Returns: | ||
Tuple containing: | ||
InternalFrame result of join_utils.align_on_index, | ||
final left_frame, | ||
list of left_frame_data_ids, | ||
list of left_index_ids | ||
""" | ||
if join == "right": | ||
left_result, left_column_mapper = align_on_index(other_frame, frame, how="left") | ||
left_frame_data_ids = left_column_mapper.map_right_quoted_identifiers( | ||
frame.data_column_snowflake_quoted_identifiers | ||
) | ||
left_index_ids = left_result.index_column_snowflake_quoted_identifiers | ||
left_frame = left_result.ordered_dataframe.select( | ||
left_frame_data_ids + left_index_ids | ||
) | ||
else: | ||
left_result, left_column_mapper = align_on_index(frame, other_frame, how=join) | ||
left_frame_data_ids = left_column_mapper.map_left_quoted_identifiers( | ||
frame.data_column_snowflake_quoted_identifiers | ||
) | ||
left_index_ids = left_result.index_column_snowflake_quoted_identifiers | ||
left_frame = left_result.ordered_dataframe.select( | ||
left_frame_data_ids + left_index_ids | ||
) | ||
return left_result, left_frame, left_frame_data_ids, left_index_ids | ||
|
||
|
||
def align_axis_0_right( | ||
frame: InternalFrame, other_frame: InternalFrame, join: str | ||
) -> tuple[InternalFrame, InternalFrame, list[str], list[str]]: | ||
""" | ||
Gets the right align results. | ||
Args: | ||
frame: original frame | ||
other_frame: other frame | ||
join: type of alignment to be performed. | ||
Returns: | ||
Tuple containing: | ||
InternalFrame result of join_utils.align_on_index, | ||
final right_frame, | ||
list of right_frame_data_ids, | ||
list of right_index_ids | ||
""" | ||
if join == "left": | ||
right_result, right_column_mapper = align_on_index(frame, other_frame, how=join) | ||
right_frame_data_ids = right_column_mapper.map_right_quoted_identifiers( | ||
other_frame.data_column_snowflake_quoted_identifiers | ||
) | ||
right_index_ids = right_result.index_column_snowflake_quoted_identifiers | ||
right_frame = right_result.ordered_dataframe.select( | ||
right_frame_data_ids + right_index_ids | ||
) | ||
elif join == "right": | ||
right_result, right_column_mapper = align_on_index( | ||
other_frame, frame, how="left" | ||
) | ||
right_frame_data_ids = right_column_mapper.map_left_quoted_identifiers( | ||
other_frame.data_column_snowflake_quoted_identifiers | ||
) | ||
right_index_ids = right_result.index_column_snowflake_quoted_identifiers | ||
right_frame = right_result.ordered_dataframe.select( | ||
right_frame_data_ids + right_index_ids | ||
) | ||
else: | ||
right_result, right_column_mapper = align_on_index(other_frame, frame, how=join) | ||
right_frame_data_ids = right_column_mapper.map_left_quoted_identifiers( | ||
other_frame.data_column_snowflake_quoted_identifiers | ||
) | ||
right_index_ids = right_result.index_column_snowflake_quoted_identifiers | ||
right_frame = right_result.ordered_dataframe.select( | ||
right_frame_data_ids + right_index_ids | ||
) | ||
return right_result, right_frame, right_frame_data_ids, right_index_ids |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.