diff --git a/CHANGELOG.md b/CHANGELOG.md index a5e03e6324a..b70f63859f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,8 @@ - Improved generated SQL query for `head` and `iloc` when the row key is a slice. - Improved error message when passing an unknown timezone to `tz_convert` and `tz_localize` in `Series`, `DataFrame`, `Series.dt`, and `DatetimeIndex`. - Improved documentation for `tz_convert` and `tz_localize` in `Series`, `DataFrame`, `Series.dt`, and `DatetimeIndex` to specify the supported timezone formats. +- Improved generated SQL query for `iloc` and `iat` when the row key is a scalar. +- Removed all joins in `iterrows`. #### Bug Fixes diff --git a/src/snowflake/snowpark/modin/plugin/extensions/indexing_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/indexing_overrides.py index ddfa87ba5f4..5ef06b1afb1 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/indexing_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/indexing_overrides.py @@ -1192,9 +1192,10 @@ def __getitem__( if not isinstance(col_loc, pd.Series) and is_range_like(col_loc): col_loc = self._convert_range_to_valid_slice(col_loc) - # Convert all scalar, list-like, and indexer row_loc to a Series object to get a query compiler object. + # Convert scalar to slice to generate efficient SQL query if is_scalar(row_loc): - row_loc = pd.Series([row_loc]) + row_loc = slice(row_loc, None if row_loc == -1 else row_loc + 1, 1) + # Convert list-like, and indexer row_loc to a Series object to get a query compiler object. elif isinstance(row_loc, pd.Index): # Convert index row_loc to series row_loc = row_loc.to_series().reset_index(drop=True) diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py index 80ad593698b..419a9fd7d4e 100644 --- a/tests/integ/modin/binary/test_binary_op.py +++ b/tests/integ/modin/binary/test_binary_op.py @@ -1945,7 +1945,7 @@ def test_binary_comparison_method_between_series_different_types(op): @pytest.mark.parametrize( "op", [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le] ) -@sql_count_checker(query_count=2, join_count=5) +@sql_count_checker(query_count=2, join_count=2) def test_binary_comparison_method_between_series_variant(lhs, rhs, op): snow_ans = op(pd.Series(lhs), pd.Series(rhs)) native_ans = op(native_pd.Series(lhs), native_pd.Series(rhs)) diff --git a/tests/integ/modin/frame/test_dropna.py b/tests/integ/modin/frame/test_dropna.py index 7e8d84ea490..30e1d7fb98e 100644 --- a/tests/integ/modin/frame/test_dropna.py +++ b/tests/integ/modin/frame/test_dropna.py @@ -142,7 +142,7 @@ def test_dropna_negative(test_dropna_df): ), ], ) -@sql_count_checker(query_count=1, join_count=4, union_count=1) +@sql_count_checker(query_count=1, union_count=1) def test_dropna_iloc(df): # Check that dropna() generates a new index correctly for iloc. # 1 join for iloc, 2 joins generated by to_pandas methods during eval. diff --git a/tests/integ/modin/frame/test_head_tail.py b/tests/integ/modin/frame/test_head_tail.py index 2cd29fb55da..3c0cef808db 100644 --- a/tests/integ/modin/frame/test_head_tail.py +++ b/tests/integ/modin/frame/test_head_tail.py @@ -71,23 +71,3 @@ def test_empty_dataframe(n, empty_snowpark_pandas_df): comparator=eval_result_and_query_with_no_join, check_column_type=False, ) - - -@pytest.mark.parametrize( - "ops", - [ - lambda df: df.head(), - lambda df: df.iloc[1:100], - lambda df: df.iloc[1000:100:-1], - ], -) -@sql_count_checker(query_count=6) -def test_head_efficient_sql(session, ops): - df = DataFrame({"a": [1] * 10000}) - with session.query_history() as query_listener: - ops(df).to_pandas() - eval_query = query_listener.queries[-2].sql_text.lower() - # check no row count - assert "count" not in eval_query - # check orderBy behinds limit - assert eval_query.index("limit") < eval_query.index("order by") diff --git a/tests/integ/modin/frame/test_iat.py b/tests/integ/modin/frame/test_iat.py index 24429291cfc..a7635a1deef 100644 --- a/tests/integ/modin/frame/test_iat.py +++ b/tests/integ/modin/frame/test_iat.py @@ -18,7 +18,7 @@ (-7, -7), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_default_index_str_columns( key, default_index_snowpark_pandas_df, @@ -62,7 +62,7 @@ def iat_set_helper(df): (-7, -7), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_str_index_str_columns( key, str_index_snowpark_pandas_df, @@ -103,7 +103,7 @@ def iat_set_helper(df): (-7, -7), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_time_index_time_columns( key, time_index_snowpark_pandas_df, @@ -147,7 +147,7 @@ def iat_set_helper(df): (-7, -7), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_multiindex_index_str_columns( key, default_index_native_df, @@ -190,7 +190,7 @@ def at_set_helper(df): (-7, -7), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_default_index_multiindex_columns( key, native_df_with_multiindex_columns, @@ -231,7 +231,7 @@ def at_set_helper(df): (-7, -7), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_multiindex_index_multiindex_columns( key, native_df_with_multiindex_columns, diff --git a/tests/integ/modin/frame/test_iloc.py b/tests/integ/modin/frame/test_iloc.py index 0329aa57692..471b5a69a47 100644 --- a/tests/integ/modin/frame/test_iloc.py +++ b/tests/integ/modin/frame/test_iloc.py @@ -102,9 +102,9 @@ test_negative_bound_list_input = [([-AXIS_LEN - 0.9], 1, 2)] test_int_inputs = [ - (0, 1, 4), - (AXIS_LEN - 1, 1, 4), - (-AXIS_LEN, 1, 4), + (0, 1, 0), + (AXIS_LEN - 1, 1, 0), + (-AXIS_LEN, 1, 0), ] test_inputs_on_df_for_dataframe_output = ( test_int_inputs + test_inputs_for_no_scalar_output @@ -328,7 +328,7 @@ def eval_func(df): (..., 1, 2), # leading ellipsis should be stripped ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_df_iloc_get_scalar( key, default_index_snowpark_pandas_df, default_index_native_df ): @@ -423,7 +423,7 @@ def test_df_iloc_get_empty_key( ) -@sql_count_checker(query_count=2, join_count=2) +@sql_count_checker(query_count=2) def test_df_iloc_get_empty(empty_snowpark_pandas_df): _ = empty_snowpark_pandas_df.iloc[0] @@ -1088,22 +1088,21 @@ def iloc_helper(df): else: return native_pd.Series([]) if axis == "row" else df.iloc[:, []] - def determine_query_and_join_count(): + def determine_query_count(): # Multiple queries because of squeeze() - in range is 2, out-of-bounds is 1. if axis == "col": - num_joins = 0 num_queries = 1 else: if not -8 < key < 7: # key is out of bound - num_queries, num_joins = 2, 8 + num_queries = 2 else: - num_queries, num_joins = 1, 4 - return num_queries, num_joins + num_queries = 1 + return num_queries - query_count, join_count = determine_query_and_join_count() + query_count = determine_query_count() # test df with default index num_cols = 7 - with SqlCounter(query_count=query_count, join_count=join_count): + with SqlCounter(query_count=query_count): eval_snowpark_pandas_result( default_index_snowpark_pandas_df, default_index_native_df, @@ -1112,20 +1111,20 @@ def determine_query_and_join_count(): # test df with non-default index num_cols = 6 # set_index() makes the number of columns 6 - with SqlCounter(query_count=query_count, join_count=join_count): + with SqlCounter(query_count=query_count): eval_snowpark_pandas_result( default_index_snowpark_pandas_df.set_index("D"), default_index_native_df.set_index("D"), iloc_helper, ) - query_count, join_count = determine_query_and_join_count() + query_count = determine_query_count() # test df with MultiIndex # Index dtype is different between Snowpark and native pandas if key produces empty df. num_cols = 7 native_df = default_index_native_df.set_index(multiindex_native) snowpark_df = pd.DataFrame(native_df) - with SqlCounter(query_count=query_count, join_count=join_count): + with SqlCounter(query_count=query_count): eval_snowpark_pandas_result( snowpark_df, native_df, @@ -1138,7 +1137,7 @@ def determine_query_and_join_count(): native_df_with_multiindex_columns ) in_range = True if (-8 < key < 7) else False - with SqlCounter(query_count=query_count, join_count=join_count): + with SqlCounter(query_count=query_count): if axis == "row" or in_range: # series result eval_snowpark_pandas_result( snowpark_df_with_multiindex_columns, @@ -1158,7 +1157,7 @@ def determine_query_and_join_count(): # test df with MultiIndex on both index and columns native_df = native_df_with_multiindex_columns.set_index(multiindex_native) snowpark_df = pd.DataFrame(native_df) - with SqlCounter(query_count=query_count, join_count=join_count): + with SqlCounter(query_count=query_count): if axis == "row" or in_range: # series result eval_snowpark_pandas_result( snowpark_df, @@ -2906,10 +2905,12 @@ def iloc_helper(df): def determine_query_and_join_count(): # Initialize count values; query_count = row_count + col_count. query_count = 1 # base query count - # All scalar and list-like row keys are treated like Series keys; a join is performed between the df and + # All list-like row keys are treated like Series keys; a join is performed between the df and # key. For slice and range keys, a filter is used on the df instead. join_count = 2 - if not isinstance(row, list) or len(row) > 0: + if is_scalar(row): + join_count = 0 + elif not isinstance(row, list) or len(row) > 0: if is_range_like(row) or isinstance(row, slice): join_count = 0 elif all(isinstance(i, bool) or isinstance(i, np.bool_) for i in row): @@ -2934,7 +2935,7 @@ def determine_query_and_join_count(): (1, native_pd.Series([False, False, False, False, False, True, True])), ], ) -@sql_count_checker(query_count=2, join_count=4, union_count=1) +@sql_count_checker(query_count=2, union_count=1) def test_df_iloc_get_array_col( row, col, @@ -3124,7 +3125,7 @@ def iloc_helper(df): col_in_range = True if col_lower_bound < col < col_upper_bound else False if row_in_range and col_in_range: # scalar value is returned - with SqlCounter(query_count=1, join_count=2): + with SqlCounter(query_count=1): snowpark_res = ( snowpark_df.iloc[(row, col)] if is_tuple @@ -3137,7 +3138,7 @@ def iloc_helper(df): for idx, val in enumerate(snowpark_res): assert val == native_res[idx] else: - with SqlCounter(query_count=1, join_count=2): + with SqlCounter(query_count=1): with pytest.raises(IndexError): iloc_helper(native_df) assert len(iloc_helper(snowpark_df)) == 0 @@ -3252,3 +3253,46 @@ def ilocset(df): native_df, ilocset, ) + + +@pytest.mark.parametrize( + "ops", + [ + lambda df: df.head(), + lambda df: df.iloc[1:100], + lambda df: df.iloc[1000:100:-1], + ], +) +@sql_count_checker(query_count=6) +def test_df_iloc_efficient_sql(session, ops): + df = DataFrame({"a": [1] * 10000}) + with session.query_history() as query_listener: + ops(df).to_pandas() + eval_query = query_listener.queries[ + -2 + ].sql_text.lower() # query before drop temp table + # check no row count is in the sql query + assert "count" not in eval_query + # check orderBy is after limit in the sql query + assert eval_query.index("limit") < eval_query.index("order by") + + +@pytest.mark.parametrize( + "ops", + [ + lambda df: df.iloc[0], + lambda df: df.iloc[100], + ], +) +@sql_count_checker(query_count=8, union_count=1) +def test_df_iloc_scalar_efficient_sql(session, ops): + df = DataFrame({"a": [1] * 10000}) + with session.query_history() as query_listener: + ops(df).to_pandas() + eval_query = query_listener.queries[ + -3 + ].sql_text.lower() # query before drop temp table and transpose + # check no row count is in the sql query + assert "count" not in eval_query + # check limit is used in the sql query + assert "limit" in eval_query diff --git a/tests/integ/modin/frame/test_iterrows.py b/tests/integ/modin/frame/test_iterrows.py index 29a5ca9b0f4..940a9144d3f 100644 --- a/tests/integ/modin/frame/test_iterrows.py +++ b/tests/integ/modin/frame/test_iterrows.py @@ -61,7 +61,7 @@ def test_df_iterrows(native_df): snowpark_df = pd.DataFrame(native_df) # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN # operations performed due to iloc. - with SqlCounter(query_count=len(native_df) + 1, join_count=4 * len(native_df)): + with SqlCounter(query_count=len(native_df) + 1): eval_snowpark_pandas_result( snowpark_df, native_df, @@ -70,7 +70,7 @@ def test_df_iterrows(native_df): ) -@sql_count_checker(query_count=8, join_count=28, union_count=7) +@sql_count_checker(query_count=8, union_count=7) def test_df_iterrows_mixed_types(default_index_native_df): # Same test as above on bigger df with mixed types. # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN @@ -85,7 +85,7 @@ def test_df_iterrows_mixed_types(default_index_native_df): ) -@sql_count_checker(query_count=7, join_count=24, union_count=6) +@sql_count_checker(query_count=7, union_count=6) def test_df_iterrows_multindex_df(): # Create df with a MultiIndex index. # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN diff --git a/tests/integ/modin/frame/test_nunique.py b/tests/integ/modin/frame/test_nunique.py index aa280d72af5..cf2737e10c1 100644 --- a/tests/integ/modin/frame/test_nunique.py +++ b/tests/integ/modin/frame/test_nunique.py @@ -31,10 +31,6 @@ @pytest.mark.parametrize("axes_slices", TEST_SLICES) @pytest.mark.parametrize("dropna", [True, False]) def test_dataframe_nunique(axes_slices, dropna): - expected_join_count = 0 - if axes_slices == (0, slice(None)): - expected_join_count = 4 - df = pd.DataFrame( pd.DataFrame(TEST_DATA, columns=TEST_LABELS).iloc[ axes_slices[0], axes_slices[1] @@ -46,7 +42,7 @@ def test_dataframe_nunique(axes_slices, dropna): ] ) - with SqlCounter(query_count=1, join_count=expected_join_count): + with SqlCounter(query_count=1): eval_snowpark_pandas_result( df, native_df, diff --git a/tests/integ/modin/index/test_indexing.py b/tests/integ/modin/index/test_indexing.py index a6ff4c4d5b9..3ddfcfc8c32 100644 --- a/tests/integ/modin/index/test_indexing.py +++ b/tests/integ/modin/index/test_indexing.py @@ -7,6 +7,7 @@ import numpy as np import pandas as native_pd import pytest +from modin.pandas.utils import is_scalar import snowflake.snowpark.modin.plugin # noqa: F401 from tests.integ.modin.utils import assert_index_equal @@ -31,7 +32,7 @@ ], ) def test_index_indexing(index, key): - if isinstance(key, slice) or key is ...: + if isinstance(key, slice) or key is ... or is_scalar(key): join_count = 0 # because slice key uses filter not join elif isinstance(key, list) and isinstance(key[0], bool): join_count = 1 # because need to join key diff --git a/tests/integ/modin/series/test_head_tail.py b/tests/integ/modin/series/test_head_tail.py index 38f75463cde..651ba43e4e2 100644 --- a/tests/integ/modin/series/test_head_tail.py +++ b/tests/integ/modin/series/test_head_tail.py @@ -1,7 +1,6 @@ # # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. # -import modin.pandas as pd import pytest from tests.integ.modin.frame.test_head_tail import eval_result_and_query_with_no_join @@ -51,23 +50,3 @@ def test_empty_dataframe(n, empty_snowpark_pandas_series, empty_pandas_series): lambda df: (df.tail() if n is None else df.tail(n)), comparator=eval_result_and_query_with_no_join, ) - - -@pytest.mark.parametrize( - "ops", - [ - lambda df: df.head(), - lambda df: df.iloc[1:100], - lambda df: df.iloc[1000:100:-1], - ], -) -@sql_count_checker(query_count=1) -def test_head_efficient_sql(session, ops): - df = pd.Series({"a": [1] * 10000}) - with session.query_history() as query_listener: - ops(df).to_pandas() - eval_query = query_listener.queries[-1].sql_text.lower() - # check no row count - assert "count" not in eval_query - # check orderBy behinds limit - assert eval_query.index("limit") < eval_query.index("order by") diff --git a/tests/integ/modin/series/test_iat.py b/tests/integ/modin/series/test_iat.py index 756c94113a4..2ffc2db90d8 100644 --- a/tests/integ/modin/series/test_iat.py +++ b/tests/integ/modin/series/test_iat.py @@ -16,7 +16,7 @@ (0,), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_default_index( key, default_index_snowpark_pandas_series, @@ -61,7 +61,7 @@ def iat_set_helper(series): (0,), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_str_index( key, str_index_snowpark_pandas_series, @@ -103,7 +103,7 @@ def iat_set_helper(series): (0,), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_time_index( key, time_index_snowpark_pandas_series, @@ -147,7 +147,7 @@ def iat_set_helper(series): (0,), ], ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_iat_get_multiindex( key, multiindex_native_int_series, diff --git a/tests/integ/modin/series/test_iloc.py b/tests/integ/modin/series/test_iloc.py index cd994733f52..7b7568bbc6b 100644 --- a/tests/integ/modin/series/test_iloc.py +++ b/tests/integ/modin/series/test_iloc.py @@ -66,7 +66,7 @@ def test_series_iloc_snowpark_pandas_input_return_dataframe( ) -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_diff2native(default_index_snowpark_pandas_series, default_index_native_series): assert ( default_index_snowpark_pandas_series.iloc[..., 3] @@ -338,7 +338,7 @@ def iloc_helper(ser): # One join is performed for each query. # test ser with default index - with SqlCounter(query_count=1, join_count=2): + with SqlCounter(query_count=1): snowpark_res = iloc_helper(default_index_int_series) native_res = iloc_helper(default_index_native_int_series) assert snowpark_res == native_res @@ -348,14 +348,14 @@ def iloc_helper(ser): int_series_with_non_default_index = pd.Series( native_int_series_with_non_default_index ) - with SqlCounter(query_count=1, join_count=2): + with SqlCounter(query_count=1): snowpark_res = iloc_helper(int_series_with_non_default_index) native_res = iloc_helper(native_int_series_with_non_default_index) assert snowpark_res == native_res # test ser with MultiIndex int_series_with_multiindex = pd.Series(multiindex_native_int_series) - with SqlCounter(query_count=1, join_count=2): + with SqlCounter(query_count=1): snowpark_res = iloc_helper(int_series_with_multiindex) native_res = iloc_helper(multiindex_native_int_series) assert snowpark_res == native_res @@ -527,7 +527,7 @@ def test_series_iloc_get_key_raises_not_implemented_error_negative( _ = snowpark_index_int_series.iloc[key] -@sql_count_checker(query_count=1, join_count=2) +@sql_count_checker(query_count=1) def test_series_iloc_get_empty(empty_snowpark_pandas_series): _ = empty_snowpark_pandas_series.iloc[0] @@ -958,3 +958,28 @@ def helper(series): series.iloc[0] = [4, 5, 6] eval_snowpark_pandas_result(series, native_s, helper, inplace=True) + + +@pytest.mark.parametrize( + "ops", + [ + lambda df: df.head(), + lambda df: df.iloc[1:100], + lambda df: df.iloc[1000:100:-1], + lambda df: df.iloc[0], + lambda df: df.iloc[100], + ], +) +@sql_count_checker(query_count=1) +def test_iloc_efficient_sql(session, ops): + df = pd.Series({"a": [1] * 10000}) + with session.query_history() as query_listener: + res = ops(df) + if isinstance(res, pd.Series): + res._to_pandas() + eval_query = query_listener.queries[-1].sql_text.lower() + # check no row count + assert "count" not in eval_query + # check orderBy is after limit in the sql query + assert "count" not in eval_query + assert eval_query.index("limit") < eval_query.index("order by") diff --git a/tests/integ/modin/series/test_mask.py b/tests/integ/modin/series/test_mask.py index b65ca056264..b9dfad35415 100644 --- a/tests/integ/modin/series/test_mask.py +++ b/tests/integ/modin/series/test_mask.py @@ -196,7 +196,7 @@ def test_series_mask_with_lambda_returns_singleton_should_fail(): @pytest.mark.parametrize( "other, sql_count, join_count", - [(lambda x: -x.iloc[0], 4, 10), (lambda x: x**2, 3, 8)], + [(lambda x: -x.iloc[0], 4, 7), (lambda x: x**2, 3, 8)], ) def test_series_mask_with_lambda_other(other, sql_count, join_count): # Multiple joins since multiple Series are created with non-Snowpark pandas data diff --git a/tests/integ/modin/series/test_where.py b/tests/integ/modin/series/test_where.py index 141f155440f..f5d2b2eb27d 100644 --- a/tests/integ/modin/series/test_where.py +++ b/tests/integ/modin/series/test_where.py @@ -196,7 +196,7 @@ def test_series_where_with_lambda_cond_returns_singleton_should_fail(): @pytest.mark.parametrize( "other, sql_count, join_count", - [(lambda x: -x.iloc[0], 4, 10), (lambda x: x**2, 3, 8)], + [(lambda x: -x.iloc[0], 4, 7), (lambda x: x**2, 3, 8)], ) def test_series_where_with_lambda_other(other, sql_count, join_count): # High join count due to creatinga Series with non-Snowpark pandas data diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py index e1cc1f2f3eb..5a5f90db50e 100644 --- a/tests/integ/modin/test_telemetry.py +++ b/tests/integ/modin/test_telemetry.py @@ -485,7 +485,7 @@ def test_telemetry_property_index(): @pytest.mark.parametrize( "name, method, expected_query_count, expected_join_count", [ - ["iloc", lambda df: df.iloc[0, 0], 1, 2], + ["iloc", lambda df: df.iloc[0, 0], 1, 0], ["loc", lambda df: df.loc[0, "a"], 2, 2], ], ) diff --git a/tests/integ/modin/tools/test_to_datetime.py b/tests/integ/modin/tools/test_to_datetime.py index 964c45b1dea..cc70a59705c 100644 --- a/tests/integ/modin/tools/test_to_datetime.py +++ b/tests/integ/modin/tools/test_to_datetime.py @@ -597,7 +597,7 @@ def test_to_datetime_pydatetime(self): @pytest.mark.parametrize( "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")] ) - @sql_count_checker(query_count=1, join_count=2) + @sql_count_checker(query_count=1) def test_to_datetime_dt64s(self, cache, dt): assert to_datetime(pd.Index([dt]), cache=cache)[0] == Timestamp(dt) @@ -838,7 +838,7 @@ def test_to_datetime_df_negative(self): {"arg": 1490195805433502912, "unit": "ns"}, ], ) - @sql_count_checker(query_count=1, join_count=2) + @sql_count_checker(query_count=1) def test_to_datetime_unit(self, sample): assert pd.to_datetime(pd.Index([sample["arg"]]), unit=sample["unit"])[ 0 diff --git a/tests/integ/modin/types/test_timedelta_indexing.py b/tests/integ/modin/types/test_timedelta_indexing.py index 4799e3b4f22..be6085364e0 100644 --- a/tests/integ/modin/types/test_timedelta_indexing.py +++ b/tests/integ/modin/types/test_timedelta_indexing.py @@ -18,7 +18,7 @@ @pytest.mark.parametrize( "key, iloc_join_count, loc_query_count, loc_join_count", [ - [2, 2, 2, 2], + [2, 0, 2, 2], [[2, 1], 2, 1, 1], [slice(1, None), 0, 1, 0], [[True, False, False, True], 1, 1, 1], @@ -56,16 +56,16 @@ def run_test(api, query_count, join_count): @pytest.mark.parametrize( "key, query_count, join_count, type_preserved", [ - [(1, 1), 1, 2, True], - [(2, 2), 1, 2, True], + [(1, 1), 1, 0, True], + [(2, 2), 1, 0, True], [([2, 1], 1), 1, 2, True], [ (2, [1, 0]), 1, - 4, + 0, True, ], # require transpose and keep result column type as timedelta - [(2, ...), 1, 4, False], # require transpose but lose the type + [(2, ...), 1, 0, False], # require transpose but lose the type [(slice(1, None), 0), 1, 0, True], [([True, False, False, True], 1), 1, 1, True], [(1, "a"), 2, 2, True], @@ -77,7 +77,7 @@ def run_test(api, query_count, join_count): 3, True, ], # require transpose and keep result column type as timedelta - [(2, ...), 1, 4, False], # require transpose but lose the type + [(2, ...), 1, 0, False], # require transpose but lose the type [(slice(1, None), "a"), 1, 0, True], [([True, False, False, True], "b"), 1, 1, True], ], @@ -427,7 +427,7 @@ def loc_enlargement(key, item, df): @pytest.mark.parametrize( "key, join_count", - [(2, 2), ([2, 1], 2), (slice(1, None), 0), ([True, False, False, True], 1)], + [(2, 0), ([2, 1], 2), (slice(1, None), 0), ([True, False, False, True], 1)], ) def test_index_get_timedelta(key, join_count): td_idx = native_pd.TimedeltaIndex( @@ -450,7 +450,7 @@ def test_index_get_timedelta(key, join_count): @pytest.mark.parametrize( "key, api, query_count, join_count", [ - [2, "iat", 1, 4], + [2, "iat", 1, 1], [native_pd.Timedelta("1 days 1 hour"), "at", 2, 4], [[2, 1], "iloc", 1, 4], [ @@ -494,7 +494,7 @@ def test_series_with_timedelta_index(key, api, query_count, join_count): @pytest.mark.parametrize( "key, api, query_count, join_count", [ - [2, "iat", 1, 4], + [2, "iat", 1, 1], [native_pd.Timedelta("1 days 1 hour"), "at", 2, 4], [[2, 1], "iloc", 1, 4], [