Skip to content

Commit

Permalink
Move isinstance_cudf_pandas to fast_slow_proxy (#17875)
Browse files Browse the repository at this point in the history
https://github.com/rapidsai/cudf/pull/17629/files#diff-8731197057aec7c2ece5535ff5fb740a7d2109b213bb859ccd19290d40b7b703R11 broke number of cuml pytests.

This was because `pandas_compatible` mode was being set in `._wrappers.pandas` and the import introduced in the above pr was the reason for it. This pr fixes it by moving the `isinstance_cudf_pandas` to `fast_slow_proxy` module.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #17875
  • Loading branch information
galipremsagar authored Jan 30, 2025
1 parent ca06c39 commit f949dee
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 31 deletions.
14 changes: 7 additions & 7 deletions docs/cudf/source/cudf_pandas/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,24 +144,24 @@ provides a similar configuration-based plugin for Spark.

## How do I know if an object is a `cudf.pandas` proxy object?

To determine if an object is a `cudf.pandas` proxy object, you can use the `isinstance_cudf_pandas` API. This function checks if the given object is a proxy object that wraps either a `cudf` or `pandas` object. Here is an example of how to use this API:
To determine if an object is a `cudf.pandas` proxy object, you can use the `is_proxy_instance` API. This function checks if the given object is a proxy object that wraps either a `cudf` or `pandas` object. Here is an example of how to use this API:

```python
from cudf.pandas import isinstance_cudf_pandas
from cudf.pandas import is_proxy_instance

obj = ... # Your object here
if isinstance_cudf_pandas(obj, pd.Series):
if is_proxy_instance(obj, pd.Series):
print("The object is a cudf.pandas proxy Series object.")
else:
print("The object is not a cudf.pandas proxy Series object.")
```

To detect `Series`, `DataFrame`, `Index`, and `ndarray` objects separately, you can pass the type names as the second parameter:

* `isinstance_cudf_pandas(obj, pd.Series)`: Detects if the object is a `cudf.pandas` proxy `Series`.
* `isinstance_cudf_pandas(obj, pd.DataFrame)`: Detects if the object is a `cudf.pandas` proxy `DataFrame`.
* `isinstance_cudf_pandas(obj, pd.Index)`: Detects if the object is a `cudf.pandas` proxy `Index`.
* `isinstance_cudf_pandas(obj, np.ndarray)`: Detects if the object is a `cudf.pandas` proxy `ndarray`.
* `is_proxy_instance(obj, pd.Series)`: Detects if the object is a `cudf.pandas` proxy `Series`.
* `is_proxy_instance(obj, pd.DataFrame)`: Detects if the object is a `cudf.pandas` proxy `DataFrame`.
* `is_proxy_instance(obj, pd.Index)`: Detects if the object is a `cudf.pandas` proxy `Index`.
* `is_proxy_instance(obj, np.ndarray)`: Detects if the object is a `cudf.pandas` proxy `ndarray`.

## How can I access the underlying GPU or CPU objects?

Expand Down
11 changes: 8 additions & 3 deletions python/cudf/cudf/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,17 @@
import pylibcudf
import rmm.mr

from ._wrappers.pandas import isinstance_cudf_pandas
from .fast_slow_proxy import is_proxy_object
from .fast_slow_proxy import is_proxy_instance, is_proxy_object
from .magics import load_ipython_extension
from .profiler import Profiler

__all__ = ["Profiler", "install", "is_proxy_object", "load_ipython_extension"]
__all__ = [
"Profiler",
"install",
"is_proxy_instance",
"is_proxy_object",
"load_ipython_extension",
]


LOADED = False
Expand Down
7 changes: 0 additions & 7 deletions python/cudf/cudf/pandas/_wrappers/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
_FunctionProxy,
_maybe_wrap_result,
_Unusable,
is_proxy_object,
make_final_proxy_type as _make_final_proxy_type,
make_intermediate_proxy_type as _make_intermediate_proxy_type,
register_proxy_func,
Expand Down Expand Up @@ -70,8 +69,6 @@
except ImportError:
ipython_shell = None

cudf.set_option("mode.pandas_compatible", True)


def _pandas_util_dir():
# In pandas 2.0, pandas.util contains public APIs under
Expand Down Expand Up @@ -1713,10 +1710,6 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
)


def isinstance_cudf_pandas(obj, type):
return is_proxy_object(obj) and obj.__class__.__name__ == type.__name__


# timestamps and timedeltas are not proxied, but non-proxied
# pandas types are currently not picklable. Thus, we define
# custom reducer/unpicker functions for these types:
Expand Down
4 changes: 4 additions & 0 deletions python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,10 @@ def _get_proxy_base_class(cls):
return object


def is_proxy_instance(obj, type):
return is_proxy_object(obj) and obj.__class__.__name__ == type.__name__


PROXY_BASE_CLASSES: set[type] = {
ProxyNDarrayBase,
}
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/pandas/module_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,10 @@ def install(
)
mode = deduce_cudf_pandas_mode(slow_lib, fast_lib)
if mode.use_fast_lib:
importlib.import_module(
pandas_wrappers = importlib.import_module(
f".._wrappers.{mode.slow_lib}", __name__
)
pandas_wrappers.cudf.set_option("mode.pandas_compatible", True)
try:
(self,) = (
p
Expand Down
26 changes: 13 additions & 13 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
)

from cudf.pandas import (
isinstance_cudf_pandas,
is_proxy_instance,
)

# Accelerated pandas has the real pandas and cudf modules as attributes
Expand Down Expand Up @@ -1902,23 +1902,23 @@ def test_is_cudf_pandas():
df = xpd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
index = xpd.Index([1, 2, 3])

assert isinstance_cudf_pandas(s, pd.Series)
assert isinstance_cudf_pandas(df, pd.DataFrame)
assert isinstance_cudf_pandas(index, pd.Index)
assert isinstance_cudf_pandas(index.values, np.ndarray)
assert is_proxy_instance(s, pd.Series)
assert is_proxy_instance(df, pd.DataFrame)
assert is_proxy_instance(index, pd.Index)
assert is_proxy_instance(index.values, np.ndarray)

for obj in [s, df, index, index.values]:
assert not isinstance_cudf_pandas(obj._fsproxy_slow, pd.Series)
assert not isinstance_cudf_pandas(obj._fsproxy_fast, pd.Series)
assert not is_proxy_instance(obj._fsproxy_slow, pd.Series)
assert not is_proxy_instance(obj._fsproxy_fast, pd.Series)

assert not isinstance_cudf_pandas(obj._fsproxy_slow, pd.DataFrame)
assert not isinstance_cudf_pandas(obj._fsproxy_fast, pd.DataFrame)
assert not is_proxy_instance(obj._fsproxy_slow, pd.DataFrame)
assert not is_proxy_instance(obj._fsproxy_fast, pd.DataFrame)

assert not isinstance_cudf_pandas(obj._fsproxy_slow, pd.Index)
assert not isinstance_cudf_pandas(obj._fsproxy_fast, pd.Index)
assert not is_proxy_instance(obj._fsproxy_slow, pd.Index)
assert not is_proxy_instance(obj._fsproxy_fast, pd.Index)

assert not isinstance_cudf_pandas(obj._fsproxy_slow, np.ndarray)
assert not isinstance_cudf_pandas(obj._fsproxy_fast, np.ndarray)
assert not is_proxy_instance(obj._fsproxy_slow, np.ndarray)
assert not is_proxy_instance(obj._fsproxy_fast, np.ndarray)


def test_series_dtype_property():
Expand Down

0 comments on commit f949dee

Please sign in to comment.