Skip to content

Commit

Permalink
SNOW-1673230: Remove modin/pandas/utils.py (#2332)
Browse files Browse the repository at this point in the history
<!---
Please answer these questions before creating your pull request. Thanks!
--->

1. Which Jira issue is this PR addressing? Make sure that there is an
accompanying issue to your PR.

   <!---
   In this section, please add a Snowflake Jira issue number.
   
Note that if a corresponding GitHub issue exists, you should still
include
   the Snowflake Jira issue number. For example, for GitHub issue
#1400, you should
   add "SNOW-1335071" here.
    --->

   Fixes SNOW-1673230

2. Fill out the following pre-review checklist:

- [ ] I am adding a new automated test(s) to verify correctness of my
new code
- [ ] If this test skips Local Testing mode, I'm requesting review from
@snowflakedb/local-testing
   - [ ] I am adding new logging messages
   - [ ] I am adding a new telemetry message
   - [ ] I am adding new credentials
   - [ ] I am adding a new dependency
- [ ] If this is a new feature/behavior, I'm adding the Local Testing
parity changes.

3. Please describe how your code solves the related issue.

This PR removes `src/snowflake/snowpark/modin/pandas/utils.py`. Some of
these functions were identical to their counterparts in
`modin/pandas/utils.py` and `modin/pandas/io.py`, and have been replaced
accordingly. The remaining functions, which are specific to Snowpark
pandas, have been moved to
`src/snowflake/snowpark/modin/plugin/extensions/utils.py`.
  • Loading branch information
sfc-gh-joshi authored Sep 23, 2024
1 parent a8a1577 commit 272e4e1
Show file tree
Hide file tree
Showing 30 changed files with 586 additions and 908 deletions.
840 changes: 0 additions & 840 deletions src/snowflake/snowpark/modin/pandas/utils.py

This file was deleted.

4 changes: 3 additions & 1 deletion src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,9 @@ def create_udtf_for_groupby_apply(
# Get the length of this list outside the vUDTF function because the vUDTF
# doesn't have access to the Snowpark module, which defines these types.
num_by = len(by_types)
from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
from snowflake.snowpark.modin.plugin.extensions.utils import (
try_convert_index_to_native,
)

data_column_index = try_convert_index_to_native(data_column_index)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from enum import Enum
from typing import Any, Literal, Optional, Union

import modin.pandas as pd
import numpy as np
import pandas as native_pd
from pandas._typing import AnyArrayLike, Scalar
Expand All @@ -15,7 +16,6 @@
from pandas.core.dtypes.inference import is_integer, is_scalar
from pandas.core.indexing import IndexingError

import snowflake.snowpark.modin.pandas as pd
from snowflake.snowpark._internal.type_utils import ColumnOrName
from snowflake.snowpark.functions import (
Column,
Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/modin/plugin/_internal/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from collections.abc import Hashable
from typing import Any, Callable, Optional, Union

import modin.pandas as pd
import numpy as np
import pandas as native_pd
from pandas._typing import FilePath

import snowflake.snowpark.modin.pandas as pd
from snowflake.snowpark.session import Session

PANDAS_KWARGS = {"names", "index_col", "usecols", "dtype"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@
def transpose_empty_df(
original_frame: InternalFrame,
) -> "SnowflakeQueryCompiler": # type: ignore[name-defined] # noqa: F821
from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
SnowflakeQueryCompiler,
)
from snowflake.snowpark.modin.plugin.extensions.utils import (
try_convert_index_to_native,
)

return SnowflakeQueryCompiler.from_pandas(
native_pd.DataFrame(
Expand Down
8 changes: 5 additions & 3 deletions src/snowflake/snowpark/modin/plugin/_internal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import traceback
from collections.abc import Hashable, Iterable, Sequence
from enum import Enum
from typing import Any, Callable, Optional, Union
from typing import TYPE_CHECKING, Any, Callable, Optional, Union

import modin.pandas as pd
import numpy as np
import pandas as native_pd
from pandas._typing import Scalar
from pandas.core.dtypes.common import is_integer_dtype, is_object_dtype, is_scalar

import snowflake.snowpark.modin.pandas as pd
import snowflake.snowpark.modin.plugin._internal.statement_params_constants as STATEMENT_PARAMS
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
DOUBLE_QUOTE,
Expand Down Expand Up @@ -47,7 +47,6 @@
to_timestamp_tz,
typeof,
)
from snowflake.snowpark.modin.plugin._internal import frame
from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
DataFrameReference,
OrderedDataFrame,
Expand Down Expand Up @@ -83,6 +82,9 @@
_FractionalType,
)

if TYPE_CHECKING:
from snowflake.snowpark.modin.plugin._internal import frame

ROW_POSITION_COLUMN_LABEL = "row_position"
MAX_ROW_POSITION_COLUMN_LABEL = f"MAX_{ROW_POSITION_COLUMN_LABEL}"
SAMPLED_ROW_POSITION_COLUMN_LABEL = f"sampled_{ROW_POSITION_COLUMN_LABEL}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from datetime import timedelta, tzinfo
from typing import Any, Callable, List, Literal, Optional, Union, get_args

import modin.pandas as pd
import numpy as np
import numpy.typing as npt
import pandas as native_pd
Expand Down Expand Up @@ -64,7 +65,6 @@
from pandas.io.formats.format import format_percentiles
from pandas.io.formats.printing import PrettyDict

import snowflake.snowpark.modin.pandas as pd
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
quote_name_without_upper_casing,
)
Expand Down Expand Up @@ -1509,7 +1509,9 @@ def set_columns(self, new_pandas_labels: Axes) -> "SnowflakeQueryCompiler":
a new `SnowflakeQueryCompiler` with updated column labels
"""
# new_pandas_names should be able to convert into an index which is consistent to pandas df.columns behavior
from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
from snowflake.snowpark.modin.plugin.extensions.utils import (
try_convert_index_to_native,
)

new_pandas_labels = ensure_index(try_convert_index_to_native(new_pandas_labels))
if len(new_pandas_labels) != len(self._modin_frame.data_column_pandas_labels):
Expand Down Expand Up @@ -2027,7 +2029,7 @@ def _binary_op_list_like_rhs_axis_1(
If data in both corresponding DataFrame locations is missing the result will be missing.
only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have).
"""
from snowflake.snowpark.modin.pandas.utils import is_scalar
from modin.pandas.utils import is_scalar

replace_mapping = {} # map: column identifier -> column expression
# Convert list-like object to list since the NaN values in the rhs are treated as invalid identifiers
Expand Down Expand Up @@ -2106,8 +2108,7 @@ def binary_op(

from modin.pandas import Series
from modin.pandas.dataframe import DataFrame

from snowflake.snowpark.modin.pandas.utils import is_scalar
from modin.pandas.utils import is_scalar

# fail explicitly for unsupported scenarios
if level is not None:
Expand Down Expand Up @@ -8210,7 +8211,7 @@ def wrapped_func(*args, **kwargs): # type: ignore[no-untyped-def] # pragma: no
self._modin_frame.data_column_snowflake_quoted_identifiers
)

from snowflake.snowpark.modin.pandas.utils import (
from snowflake.snowpark.modin.plugin.extensions.utils import (
try_convert_index_to_native,
)

Expand Down Expand Up @@ -8663,7 +8664,7 @@ def pivot_table(
# If we hit this error, that means that we have attempted a pivot on an empty
# DataFrame, so we catch the exception and return an empty DataFrame.
if e.sql_error_code == 1146:
from snowflake.snowpark.modin.pandas.utils import from_pandas
from modin.pandas.io import from_pandas

native_df = native_pd.DataFrame(index=self.index, columns=self.columns)
native_df.index.names = self.index.names
Expand Down Expand Up @@ -9955,7 +9956,7 @@ def where(
"""
# Raise not implemented error if level is specified, or other is not snowflake query compiler or
# involves more complex scalar type (not simple scalar types like int or float)
from snowflake.snowpark.modin.pandas.utils import is_scalar
from modin.pandas.utils import is_scalar

other_is_series_self_is_not = (getattr(self, "_shape_hint", None) is None) and (
getattr(other, "_shape_hint", None) == "column"
Expand Down Expand Up @@ -10583,7 +10584,7 @@ def fillna_expr(snowflake_quoted_id: str) -> SnowparkColumn:
raise ErrorMessage.not_implemented(
"Currently only can fill with dict/Series column by column"
)
from snowflake.snowpark.modin.pandas.utils import is_scalar
from modin.pandas.utils import is_scalar

# prepare label_to_value_map
if is_scalar(value):
Expand Down Expand Up @@ -14350,7 +14351,7 @@ def duplicated(

Returns
-------
Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series`
Snowpark pandas :class:`~modin.pandas.Series`
Boolean series for each duplicated rows.
"""
frame = self._modin_frame.ensure_row_position_column()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import pandas
from modin.pandas import Series
from modin.pandas.base import BasePandasDataset
from modin.pandas.utils import is_scalar
from pandas._libs import lib
from pandas._libs.lib import NoDefault, is_bool, no_default
from pandas._typing import (
Expand Down Expand Up @@ -64,15 +65,14 @@
register_dataframe_accessor,
register_series_accessor,
)
from snowflake.snowpark.modin.pandas.utils import (
from snowflake.snowpark.modin.plugin._typing import ListLike
from snowflake.snowpark.modin.plugin.extensions.utils import (
ensure_index,
extract_validate_and_try_convert_named_aggs_from_kwargs,
get_as_shape_compatible_dataframe_or_series,
is_scalar,
raise_if_native_pandas_objects,
validate_and_try_convert_agg_func_arg_func_to_str,
)
from snowflake.snowpark.modin.plugin._typing import ListLike
from snowflake.snowpark.modin.plugin.utils.error_message import (
ErrorMessage,
base_not_implemented,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import pandas as native_pd
from modin.pandas import DataFrame, Series
from modin.pandas.base import BasePandasDataset
from modin.pandas.io import from_non_pandas, from_pandas
from modin.pandas.utils import is_scalar
from pandas._libs.lib import NoDefault, no_default
from pandas._typing import (
AggFuncType,
Expand Down Expand Up @@ -65,15 +67,6 @@
from pandas.util._validators import validate_bool_kwarg

from snowflake.snowpark.modin.pandas.api.extensions import register_dataframe_accessor
from snowflake.snowpark.modin.pandas.utils import (
create_empty_native_pandas_frame,
from_non_pandas,
from_pandas,
is_scalar,
raise_if_native_pandas_objects,
replace_external_data_keys_with_empty_pandas_series,
replace_external_data_keys_with_query_compiler,
)
from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
is_snowflake_agg_func,
)
Expand All @@ -86,6 +79,12 @@
from snowflake.snowpark.modin.plugin.extensions.snow_partition_iterator import (
SnowparkPandasRowPartitionIterator,
)
from snowflake.snowpark.modin.plugin.extensions.utils import (
create_empty_native_pandas_frame,
raise_if_native_pandas_objects,
replace_external_data_keys_with_empty_pandas_series,
replace_external_data_keys_with_query_compiler,
)
from snowflake.snowpark.modin.plugin.utils.error_message import (
ErrorMessage,
dataframe_not_implemented,
Expand Down Expand Up @@ -459,7 +458,9 @@ def __init__(
# TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
# Siblings are other dataframes that share the same query compiler. We
# use this list to update inplace when there is a shallow copy.
from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
from snowflake.snowpark.modin.plugin.extensions.utils import (
try_convert_index_to_native,
)

self._siblings = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import pandas.core.common as common
from modin.pandas import DataFrame, Series
from modin.pandas.base import BasePandasDataset
from modin.pandas.utils import is_scalar
from pandas import IntervalIndex, NaT, Timedelta, Timestamp
from pandas._libs import NaTType, lib
from pandas._libs.tslibs import to_offset
Expand Down Expand Up @@ -66,10 +67,6 @@
# add this line to make doctests runnable
from snowflake.snowpark import modin
from snowflake.snowpark.modin import pandas as pd # noqa: F401
from snowflake.snowpark.modin.pandas.utils import (
is_scalar,
raise_if_native_pandas_objects,
)
from snowflake.snowpark.modin.plugin._internal.telemetry import (
snowpark_pandas_telemetry_standalone_function_decorator,
)
Expand All @@ -80,6 +77,9 @@
from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
SnowflakeQueryCompiler,
)
from snowflake.snowpark.modin.plugin.extensions.utils import (
raise_if_native_pandas_objects,
)
from snowflake.snowpark.modin.plugin.utils.error_message import (
ErrorMessage,
pandas_module_level_function_not_implemented,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from collections.abc import Hashable
from typing import Any, Callable, Literal, Optional, Sequence, Union

import modin.pandas as pd
import numpy as np # noqa: F401
import numpy.typing as npt
import pandas
Expand All @@ -36,13 +37,6 @@
from pandas.io.formats.printing import PrettyDict
from pandas.util._validators import validate_bool_kwarg

# the following import is used in doctests
from snowflake.snowpark.modin import pandas as pd # noqa: F401
from snowflake.snowpark.modin.pandas.utils import (
extract_validate_and_try_convert_named_aggs_from_kwargs,
raise_if_native_pandas_objects,
validate_and_try_convert_agg_func_arg_func_to_str,
)
from snowflake.snowpark.modin.plugin._internal.apply_utils import (
create_groupby_transform_func,
)
Expand All @@ -51,6 +45,13 @@
from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
SnowflakeQueryCompiler,
)

# the following import is used in doctests
from snowflake.snowpark.modin.plugin.extensions.utils import (
extract_validate_and_try_convert_named_aggs_from_kwargs,
raise_if_native_pandas_objects,
validate_and_try_convert_agg_func_arg_func_to_str,
)
from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
from snowflake.snowpark.modin.utils import (
Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/modin/plugin/extensions/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@
)
from pandas.core.dtypes.inference import is_hashable

from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
from snowflake.snowpark.modin.plugin._internal.timestamp_utils import DateTimeOrigin
from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
SnowflakeQueryCompiler,
)
from snowflake.snowpark.modin.plugin.extensions.utils import try_convert_index_to_native
from snowflake.snowpark.modin.plugin.utils.error_message import (
ErrorMessage,
index_not_implemented,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from modin.pandas import Series
from modin.pandas.base import BasePandasDataset
from modin.pandas.dataframe import DataFrame
from modin.pandas.utils import is_scalar
from pandas._libs.tslibs import Resolution, parsing
from pandas._typing import AnyArrayLike, Scalar
from pandas.api.types import is_bool, is_list_like
Expand All @@ -59,8 +60,7 @@
from pandas.core.indexing import IndexingError

import snowflake.snowpark.modin.pandas as pd
import snowflake.snowpark.modin.pandas.utils as frontend_utils
from snowflake.snowpark.modin.pandas.utils import is_scalar
import snowflake.snowpark.modin.plugin.extensions.utils as frontend_utils
from snowflake.snowpark.modin.plugin._internal.indexing_utils import (
MULTIPLE_ELLIPSIS_INDEXING_ERROR_MESSAGE,
TOO_FEW_INDEXERS_INDEXING_ERROR_MESSAGE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import pandas as native_pd
from modin.pandas import DataFrame, Series
from modin.pandas.base import BasePandasDataset
from modin.pandas.io import from_pandas
from modin.pandas.utils import is_scalar
from pandas._libs.lib import NoDefault, is_integer, no_default
from pandas._typing import (
AggFuncType,
Expand All @@ -37,12 +39,8 @@

from snowflake.snowpark.modin import pandas as spd # noqa: F401
from snowflake.snowpark.modin.pandas.api.extensions import register_series_accessor
from snowflake.snowpark.modin.pandas.utils import (
from_pandas,
is_scalar,
try_convert_index_to_native,
)
from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
from snowflake.snowpark.modin.plugin.extensions.utils import try_convert_index_to_native
from snowflake.snowpark.modin.plugin.utils.error_message import (
ErrorMessage,
series_not_implemented,
Expand Down
Loading

0 comments on commit 272e4e1

Please sign in to comment.