SNOW-2677419: Add support for resample functions in faster pandas (#3990)

sfc-gh-helmeleegy · web-flow · commit 377001a64382 · 2025-11-07T12:42:06.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -166,6 +166,19 @@
   - `groupby.resample`
   - `to_snowflake`
   - `to_snowpark`
+  - `resample.min`
+  - `resample.max`
+  - `resample.count`
+  - `resample.sum`
+  - `resample.mean`
+  - `resample.median`
+  - `resample.std`
+  - `resample.var`
+  - `resample.size`
+  - `resample.first`
+  - `resample.last`
+  - `resample.quantile`
+  - `resample.nunique`
 - Make faster pandas disabled by default (opt-in instead of opt-out).
 - Improve performance of `drop_duplicates` by avoiding joins when `keep!=False` in faster pandas.
 
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -15752,6 +15752,42 @@ def resample(
         resample_method_args: tuple[Any],
         resample_method_kwargs: dict[str, Any],
         is_series: bool,
+    ) -> Union["SnowflakeQueryCompiler", collections.defaultdict[Hashable, list]]:
+        """
+        Wrapper around _resample_internal to be supported in faster pandas.
+        """
+        relaxed_query_compiler = None
+        if self._relaxed_query_compiler is not None:
+            result = self._relaxed_query_compiler._resample_internal(
+                resample_kwargs=resample_kwargs,
+                resample_method=resample_method,
+                resample_method_args=resample_method_args,
+                resample_method_kwargs=resample_method_kwargs,
+                is_series=is_series,
+            )
+            if isinstance(result, SnowflakeQueryCompiler):
+                relaxed_query_compiler = result
+            else:
+                return result
+        result = self._resample_internal(
+            resample_kwargs=resample_kwargs,
+            resample_method=resample_method,
+            resample_method_args=resample_method_args,
+            resample_method_kwargs=resample_method_kwargs,
+            is_series=is_series,
+        )
+        if isinstance(result, SnowflakeQueryCompiler):
+            return self._maybe_set_relaxed_qc(result, relaxed_query_compiler)
+        else:
+            return result
+
+    def _resample_internal(
+        self,
+        resample_kwargs: dict[str, Any],
+        resample_method: AggFuncType,
+        resample_method_args: tuple[Any],
+        resample_method_kwargs: dict[str, Any],
+        is_series: bool,
     ) -> Union["SnowflakeQueryCompiler", collections.defaultdict[Hashable, list]]:
         """
         Return new SnowflakeQueryCompiler whose ordered frame holds the result of a resample operation.
diff --git a/tests/integ/modin/test_faster_pandas.py b/tests/integ/modin/test_faster_pandas.py
@@ -1029,6 +1029,63 @@ def test_rename(session):
         assert_frame_equal(snow_result, native_result)
 
 
+@pytest.mark.parametrize(
+    "func",
+    [
+        "max",
+        "min",
+        "mean",
+        "median",
+        "sum",
+        "std",
+        "var",
+        "count",
+        "size",
+        "first",
+        "last",
+        "quantile",
+        "nunique",
+    ],
+)
+@sql_count_checker(query_count=5, join_count=1)
+def test_resample(session, func):
+    with session_parameter_override(
+        session, "dummy_row_pos_optimization_enabled", True
+    ):
+        # create tables
+        table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+        session.create_dataframe(
+            native_pd.DataFrame(
+                {"A": np.random.randn(15)},
+                index=native_pd.date_range("2020-01-01", periods=15, freq="1h"),
+            ).reset_index(drop=False)
+        ).write.save_as_table(table_name, table_type="temp")
+
+        # create snow dataframes
+        df = pd.read_snowflake(table_name, index_col="index")
+        snow_result = getattr(df.resample(rule="2h", closed="left"), func)()
+
+        # verify that the input dataframe has a populated relaxed query compiler
+        assert df._query_compiler._relaxed_query_compiler is not None
+        assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
+        # verify that the output dataframe also has a populated relaxed query compiler
+        assert snow_result._query_compiler._relaxed_query_compiler is not None
+        assert (
+            snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode
+            is True
+        )
+
+        # create pandas dataframes
+        native_df = df.to_pandas()
+        native_result = getattr(native_df.resample(rule="2h", closed="left"), func)()
+
+        # compare results
+        if func == "size":
+            assert_series_equal(snow_result, native_result, check_freq=False)
+        else:
+            assert_frame_equal(snow_result, native_result, check_freq=False)
+
+
 @pytest.mark.parametrize(
     "func",
     [