Skip to content

Commit 149e7df

Browse files
committed
Revert "SNOW-2230971: Support repr, joins, loc, reset_index, and binary ops in faster pandas (#3602)"
This reverts commit bb3d445.
1 parent 1612318 commit 149e7df

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+324
-763
lines changed

CHANGELOG.md

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,10 @@
1919

2020
#### New Features
2121

22-
- Added support for creating permanent and immutable UDFs/UDTFs with `DataFrame/Series/GroupBy.apply`, `map`, and `transform` by passing the `snowflake_udf_params` keyword argument. See documentation for details.
23-
2422
#### Improvements
25-
2623
- Hybrid execution row estimate improvements and a reduction of eager calls.
27-
- Improved performance by deferring row position computation.
28-
- The following operations are currently supported and can benefit from the optimization: `read_snowflake`, `repr`, `loc`, `reset_index`, `merge`, and binary operations.
29-
- If a lazy object (e.g., DataFrame or Series) depends on a mix of supported and unsupported operations, the optimization will not be used.
30-
- Add a new configuration variable to control transfer costs out of Snowflake when using hybrid execution. Lower the default to 100k from 10M.
24+
- Add a new configuration variable to control transfer costs out of Snowflake when using hybrid execution.
25+
- Added support for creating permanent and immutable UDFs/UDTFs with `DataFrame/Series/GroupBy.apply`, `map`, and `transform` by passing the `snowflake_udf_params` keyword argument. See documentation for details.
3126

3227
#### Bug Fixes
3328

src/snowflake/snowpark/modin/plugin/_internal/concat_utils.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,7 @@ def _select_columns(
336336
)
337337

338338

339-
def add_global_ordering_columns(
340-
frame: InternalFrame, position: int, dummy_row_pos_mode: bool = False
341-
) -> InternalFrame:
339+
def add_global_ordering_columns(frame: InternalFrame, position: int) -> InternalFrame:
342340
"""
343341
To create global ordering for concat (axis=0) operation we first ensure a
344342
row position column for local ordering within the frame. Then add another
@@ -353,7 +351,7 @@ def add_global_ordering_columns(
353351
A new frame with updated ordering columns.
354352
355353
"""
356-
frame = frame.ensure_row_position_column(dummy_row_pos_mode)
354+
frame = frame.ensure_row_position_column()
357355
ordered_dataframe = frame.ordered_dataframe.sort(
358356
[OrderingColumn(frame.row_position_snowflake_quoted_identifier)]
359357
)

src/snowflake/snowpark/modin/plugin/_internal/cut_utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ def compute_bin_indices(
152152
cuts_frame: InternalFrame,
153153
n_cuts: int,
154154
right: bool = True,
155-
dummy_row_pos_mode: bool = False,
156155
) -> InternalFrame:
157156
"""
158157
Given a frame of cuts, i.e. borders of bins (strictly increasing) compute for the data in values_frame the index of the bin they fall into.
@@ -184,7 +183,7 @@ def compute_bin_indices(
184183
# within OrderedDataFrame yet, we use the Snowpark layer directly. This should have no negative
185184
# consequences when it comes to building lazy graphs, as both cut and qcut are materializing operations.
186185

187-
cuts_frame = cuts_frame.ensure_row_position_column(dummy_row_pos_mode)
186+
cuts_frame = cuts_frame.ensure_row_position_column()
188187
# perform asof join to find the closet to the cut frame data.
189188
asof_result = join(
190189
values_frame,

src/snowflake/snowpark/modin/plugin/_internal/frame.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -888,19 +888,15 @@ def to_pandas(
888888
###########################################################################
889889
# START: Internal Frame mutation APIs.
890890
# APIs that creates a new InternalFrame instance, should only be added below
891-
def ensure_row_position_column(
892-
self, dummy_row_pos_mode: bool = False
893-
) -> "InternalFrame":
891+
def ensure_row_position_column(self) -> "InternalFrame":
894892
"""
895893
Ensure row position column is computed for given internal frame.
896894
897895
Returns:
898896
A new InternalFrame instance with computed virtual index.
899897
"""
900898
return InternalFrame.create(
901-
ordered_dataframe=self.ordered_dataframe.ensure_row_position_column(
902-
dummy_row_pos_mode
903-
),
899+
ordered_dataframe=self.ordered_dataframe.ensure_row_position_column(),
904900
data_column_pandas_labels=self.data_column_pandas_labels,
905901
data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
906902
data_column_pandas_index_names=self.data_column_pandas_index_names,
@@ -1354,9 +1350,7 @@ def select_active_columns(self) -> "InternalFrame":
13541350
)
13551351

13561352
def strip_duplicates(
1357-
self: "InternalFrame",
1358-
quoted_identifiers: list[str],
1359-
dummy_row_pos_mode: bool = False,
1353+
self: "InternalFrame", quoted_identifiers: list[str]
13601354
) -> "InternalFrame":
13611355
"""
13621356
When assigning frames via index operations for duplicates only the last entry is used, as entries are repeatedly overwritten.
@@ -1370,7 +1364,7 @@ def strip_duplicates(
13701364
new internal frame with unique index.
13711365
"""
13721366

1373-
frame = self.ensure_row_position_column(dummy_row_pos_mode)
1367+
frame = self.ensure_row_position_column()
13741368

13751369
# To remove the duplicates, first compute via windowing over index columns the value of the last row position.
13761370
# with this join then select only the relevant rows. Note that an EXISTS subquery doesn't work here because
@@ -1406,15 +1400,12 @@ def strip_duplicates(
14061400
left_on_cols=[frame.row_position_snowflake_quoted_identifier],
14071401
right_on_cols=[relevant_last_value_row_positions_quoted_identifier],
14081402
how="inner",
1409-
dummy_row_pos_mode=dummy_row_pos_mode,
14101403
)
14111404

14121405
# Because we reuse row position to select the relevant columns, we need to
14131406
# generate a new row position column here so locational indexing after this operation
14141407
# continues to work correctly.
1415-
new_ordered_dataframe = joined_ordered_dataframe.ensure_row_position_column(
1416-
dummy_row_pos_mode
1417-
)
1408+
new_ordered_dataframe = joined_ordered_dataframe.ensure_row_position_column()
14181409
return InternalFrame.create(
14191410
ordered_dataframe=new_ordered_dataframe,
14201411
data_column_pandas_labels=frame.data_column_pandas_labels,

src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ def generate_regular_range(
7979

8080
def _create_qc_from_snowpark_dataframe(
8181
sp_df: DataFrame,
82-
dummy_row_pos_mode: bool = False,
8382
) -> "snowflake_query_compiler.SnowflakeQueryCompiler":
8483
"""
8584
Create a Snowflake query compiler from a Snowpark DataFrame, assuming the DataFrame only contains one column.
@@ -90,9 +89,7 @@ def _create_qc_from_snowpark_dataframe(
9089
Returns:
9190
A Snowflake query compiler
9291
"""
93-
odf = OrderedDataFrame(DataFrameReference(sp_df)).ensure_row_position_column(
94-
dummy_row_pos_mode
95-
)
92+
odf = OrderedDataFrame(DataFrameReference(sp_df)).ensure_row_position_column()
9693

9794
from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
9895
SnowflakeQueryCompiler,

src/snowflake/snowpark/modin/plugin/_internal/get_dummies_utils.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ def get_dummies_helper(
184184
columns: list[Hashable],
185185
prefixes: list[Hashable],
186186
prefix_sep: str,
187-
dummy_row_pos_mode: bool = False,
188187
) -> InternalFrame:
189188
"""
190189
Helper function for get dummies to perform encoding on given columns
@@ -223,9 +222,9 @@ def get_dummies_helper(
223222
)
224223

225224
# append a lit true column as value column for pivot
226-
new_internal_frame = internal_frame.ensure_row_position_column(
227-
dummy_row_pos_mode
228-
).append_column(LIT_TRUE_COLUMN_PANDAS_LABEL, pandas_lit(True))
225+
new_internal_frame = internal_frame.ensure_row_position_column().append_column(
226+
LIT_TRUE_COLUMN_PANDAS_LABEL, pandas_lit(True)
227+
)
229228
# the dummy column is appended as the last data column of the new_internal_frame
230229
row_position_column_snowflake_quoted_identifier = (
231230
new_internal_frame.row_position_snowflake_quoted_identifier

0 commit comments

Comments
 (0)