Skip to content

Commit 976490a

Browse files
committed
feat: add NumpyExtensionArray
1 parent 60ef822 commit 976490a

File tree

7 files changed

+297
-61
lines changed

7 files changed

+297
-61
lines changed

โ€Žpandas-stubs/_typing.pyiโ€Ž

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -876,15 +876,15 @@ SliceType: TypeAlias = Hashable | None
876876
## All types below this point are only used in pandas-stubs
877877
######
878878

879-
BuiltinNotStrDtypeArg: TypeAlias = (
879+
BuiltinDtypeArg: TypeAlias = (
880880
BuiltinBooleanDtypeArg
881881
| BuiltinIntDtypeArg
882882
| BuiltinFloatDtypeArg
883883
| BuiltinComplexDtypeArg
884+
| BuiltinStrDtypeArg
884885
| BuiltinBytesDtypeArg
885886
| BuiltinObjectDtypeArg
886887
)
887-
BuiltinDtypeArg: TypeAlias = BuiltinNotStrDtypeArg | BuiltinStrDtypeArg
888888
NumpyNotTimeDtypeArg: TypeAlias = (
889889
NumpyBooleanDtypeArg
890890
| NumpyIntDtypeArg
@@ -923,6 +923,7 @@ np_ndarray_bool: TypeAlias = npt.NDArray[np.bool_]
923923
np_ndarray_str: TypeAlias = npt.NDArray[np.str_]
924924
np_ndarray_dt: TypeAlias = npt.NDArray[np.datetime64]
925925
np_ndarray_td: TypeAlias = npt.NDArray[np.timedelta64]
926+
np_ndarray_object: TypeAlias = npt.NDArray[np.object_]
926927

927928
# Define shape and generic type variables with defaults similar to numpy
928929
GenericT = TypeVar("GenericT", bound=np.generic, default=Any)
Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
from pandas.core.arraylike import OpsMixin
22
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
3+
from typing_extensions import Self
34

4-
class NumpyExtensionArray(OpsMixin, NDArrayBackedExtensionArray): ...
5+
from pandas._typing import np_ndarray
6+
7+
class NumpyExtensionArray(OpsMixin, NDArrayBackedExtensionArray):
8+
def __new__(cls, values: np_ndarray | Self, copy: bool = False) -> Self: ...

โ€Žpandas-stubs/core/arrays/string_.pyiโ€Ž

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@ from typing import (
33
Literal,
44
)
55

6+
from pandas.core.arrays.base import ExtensionArray
67
from pandas.core.arrays.numpy_ import NumpyExtensionArray
8+
import pyarrow as pa
79
from typing_extensions import Self
810

911
from pandas._libs.missing import NAType
1012
from pandas._typing import (
11-
AnyArrayLike,
1213
DtypeArg,
14+
np_ndarray_object,
15+
np_ndarray_str,
1316
)
1417

1518
from pandas.core.dtypes.base import ExtensionDtype
@@ -25,7 +28,13 @@ class StringDtype(ExtensionDtype):
2528
@property
2629
def na_value(self) -> NAType | float: ...
2730

28-
class StringArray(NumpyExtensionArray):
29-
def __init__(self, values: AnyArrayLike, copy: bool = False) -> None: ...
30-
def __arrow_array__(self, type: DtypeArg | None = None) -> Any: ...
31+
class BaseStringArray(ExtensionArray):
32+
@property
33+
def dtype(self) -> StringDtype: ...
34+
35+
class StringArray(BaseStringArray, NumpyExtensionArray):
36+
def __new__(
37+
cls, values: np_ndarray_object | np_ndarray_str, copy: bool = False
38+
) -> Self: ...
39+
def __arrow_array__(self, type: DtypeArg | None = None) -> pa.StringArray: ...
3140
def __setitem__(self, key: Any, value: Any) -> None: ...

โ€Žpandas-stubs/core/construction.pyiโ€Ž

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,16 @@ from pandas.core.arrays.boolean import BooleanArray
99
from pandas.core.arrays.floating import FloatingArray
1010
from pandas.core.arrays.integer import IntegerArray
1111
from pandas.core.arrays.numpy_ import NumpyExtensionArray
12-
from pandas.core.arrays.string_ import StringArray
12+
from pandas.core.arrays.string_ import (
13+
BaseStringArray,
14+
StringArray,
15+
)
1316
from pandas.core.indexes.range import RangeIndex
1417

1518
from pandas._libs.missing import NAType
1619
from pandas._libs.tslibs.nattype import NaTType
1720
from pandas._typing import (
18-
BuiltinNotStrDtypeArg,
21+
BuiltinDtypeArg,
1922
Just,
2023
NumpyNotTimeDtypeArg,
2124
PandasBooleanDtypeArg,
@@ -40,13 +43,13 @@ def array( # empty data, [float("nan")]
4043
@overload
4144
def array(
4245
data: SequenceNotStr[Any],
43-
dtype: BuiltinNotStrDtypeArg | NumpyNotTimeDtypeArg,
46+
dtype: BuiltinDtypeArg | NumpyNotTimeDtypeArg,
4447
copy: bool = True,
4548
) -> NumpyExtensionArray: ...
4649
@overload
4750
def array( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload]
4851
data: Sequence[NAType | NaTType | None],
49-
dtype: BuiltinNotStrDtypeArg | NumpyNotTimeDtypeArg | None = None,
52+
dtype: BuiltinDtypeArg | NumpyNotTimeDtypeArg | None = None,
5053
copy: bool = True,
5154
) -> NumpyExtensionArray: ...
5255
@overload
@@ -95,22 +98,22 @@ def array(
9598
) -> FloatingArray: ...
9699
@overload
97100
def array(
98-
data: SequenceNotStr[str | np.str_ | float | NAType | None],
101+
data: (
102+
SequenceNotStr[str | np.str_ | float | NAType | None]
103+
| np_ndarray
104+
| BaseStringArray
105+
),
99106
dtype: PandasStrDtypeArg,
100107
copy: bool = True,
101108
) -> StringArray: ...
102109
@overload
103110
def array(
104-
data: SequenceNotStr[str | np.str_ | NAType | None],
111+
data: (
112+
SequenceNotStr[str | np.str_ | NAType | None] | np_ndarray_str | BaseStringArray
113+
),
105114
dtype: None = None,
106115
copy: bool = True,
107-
) -> StringArray: ...
108-
@overload
109-
def array(
110-
data: np_ndarray_str | StringArray,
111-
dtype: PandasStrDtypeArg | None = None,
112-
copy: bool = True,
113-
) -> StringArray: ...
116+
) -> BaseStringArray: ...
114117
@overload
115118
def array(
116119
data: SequenceNotStr[Any],
@@ -120,6 +123,6 @@ def array(
120123
@overload
121124
def array(
122125
data: np_ndarray | NumpyExtensionArray | RangeIndex,
123-
dtype: BuiltinNotStrDtypeArg | NumpyNotTimeDtypeArg | None = None,
126+
dtype: BuiltinDtypeArg | NumpyNotTimeDtypeArg | None = None,
124127
copy: bool = True,
125128
) -> NumpyExtensionArray: ...
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from collections import UserList
2+
from collections.abc import (
3+
Callable,
4+
Sequence,
5+
)
6+
from typing import (
7+
TYPE_CHECKING,
8+
Any,
9+
)
10+
11+
import numpy as np
12+
import pandas as pd
13+
from pandas.core.arrays.string_ import BaseStringArray
14+
import pytest
15+
from typing_extensions import assert_type
16+
17+
from tests import check
18+
from tests.utils import powerset
19+
20+
21+
@pytest.mark.parametrize("typ", [list, tuple, UserList])
22+
@pytest.mark.parametrize("data", powerset(["๐Ÿผ", np.str_("๐Ÿผ")], 1))
23+
@pytest.mark.parametrize("missing_values", powerset([np.nan, None, pd.NA]))
24+
def test_construction_sequence(
25+
data: tuple[str | np.str_, ...],
26+
missing_values: tuple[Any, ...],
27+
typ: Callable[[Sequence[Any]], Sequence[Any]],
28+
) -> None:
29+
check(pd.array(typ([*data, *missing_values])), BaseStringArray)
30+
31+
if TYPE_CHECKING:
32+
assert_type(pd.array(["๐Ÿผ", np.str_("๐Ÿผ")]), BaseStringArray)
33+
34+
assert_type(pd.array(["๐Ÿผ", np.str_("๐Ÿผ"), None]), BaseStringArray)
35+
assert_type(pd.array(["๐Ÿผ", np.str_("๐Ÿผ"), pd.NA]), BaseStringArray)
36+
37+
assert_type(pd.array(["๐Ÿผ", np.str_("๐Ÿผ"), None, pd.NA]), BaseStringArray)
38+
39+
assert_type(pd.array(("๐Ÿผ", np.str_("๐Ÿผ"))), BaseStringArray)
40+
assert_type(pd.array(("๐Ÿผ", np.str_("๐Ÿผ"), pd.NA)), BaseStringArray)
41+
42+
assert_type(pd.array(UserList(["๐Ÿผ", np.str_("๐Ÿผ")])), BaseStringArray)
43+
44+
45+
def test_construction_array_like() -> None:
46+
np_arr = np.array(["๐Ÿผ", np.str_("๐Ÿผ")], np.str_)
47+
check(assert_type(pd.array(np_arr), BaseStringArray), BaseStringArray)
48+
49+
check(
50+
assert_type(pd.array(pd.array(["๐Ÿผ", np.str_("๐Ÿผ")])), BaseStringArray),
51+
BaseStringArray,
52+
)

0 commit comments

Comments
ย (0)