Skip to content

Commit 1a2aa76

Browse files
committed
feat: add ExtensionArrays
1 parent def7c89 commit 1a2aa76

File tree

8 files changed

+225
-16
lines changed

8 files changed

+225
-16
lines changed

pandas-stubs/core/algorithms.pyi

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ def unique(values: IntervalIndex[IntervalT]) -> IntervalIndex[IntervalT]: ...
3939
@overload
4040
def unique(values: PeriodIndex) -> PeriodIndex: ...
4141
@overload
42-
# switch to DatetimeIndex after Pandas 3.0
42+
# TODO: switch to DatetimeIndex after Pandas 3.0 pandas-dev/pandas#57064
4343
def unique(values: DatetimeIndex) -> np_1darray_dt | DatetimeIndex: ...
4444
@overload
45-
# switch to TimedeltaIndex after Pandas 3.0
45+
# TODO: switch to TimedeltaIndex after Pandas 3.0 pandas-dev/pandas#57064
4646
def unique(values: TimedeltaIndex) -> np_1darray_td: ...
4747
@overload
4848
# switch to Index[int] after Pandas 3.0
@@ -59,7 +59,8 @@ else:
5959
) -> np_ndarray: ...
6060

6161
@overload
62-
def unique(values: Index) -> np_1darray | Index: ... # switch to Index after Pandas 3.0
62+
# TODO: switch to Index after Pandas 3.0 pandas-dev/pandas#57064
63+
def unique(values: Index) -> np_1darray | Index: ...
6364
@overload
6465
def unique(values: Categorical) -> Categorical: ...
6566

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,33 @@
1-
from pandas.core.arrays.base import ExtensionArray as ExtensionArray
2-
from pandas.core.arrays.boolean import BooleanArray as BooleanArray
3-
from pandas.core.arrays.categorical import Categorical as Categorical
4-
from pandas.core.arrays.datetimes import DatetimeArray as DatetimeArray
5-
from pandas.core.arrays.integer import IntegerArray as IntegerArray
6-
from pandas.core.arrays.interval import IntervalArray as IntervalArray
7-
from pandas.core.arrays.numpy_ import NumpyExtensionArray as NumpyExtensionArray
8-
from pandas.core.arrays.period import PeriodArray as PeriodArray
9-
from pandas.core.arrays.sparse import SparseArray as SparseArray
10-
from pandas.core.arrays.string_ import StringArray as StringArray
11-
from pandas.core.arrays.timedeltas import TimedeltaArray as TimedeltaArray
1+
from pandas.core.arrays.arrow import ArrowExtensionArray
2+
from pandas.core.arrays.base import ExtensionArray
3+
from pandas.core.arrays.boolean import BooleanArray
4+
from pandas.core.arrays.categorical import Categorical
5+
from pandas.core.arrays.datetimes import DatetimeArray
6+
from pandas.core.arrays.floating import FloatingArray
7+
from pandas.core.arrays.integer import IntegerArray
8+
from pandas.core.arrays.interval import IntervalArray
9+
from pandas.core.arrays.masked import BaseMaskedArray
10+
from pandas.core.arrays.numpy_ import NumpyExtensionArray
11+
from pandas.core.arrays.period import PeriodArray
12+
from pandas.core.arrays.sparse import SparseArray
13+
from pandas.core.arrays.string_ import StringArray
14+
from pandas.core.arrays.string_arrow import ArrowStringArray
15+
from pandas.core.arrays.timedeltas import TimedeltaArray
16+
17+
__all__ = [
18+
"ArrowExtensionArray",
19+
"ArrowStringArray",
20+
"BaseMaskedArray",
21+
"BooleanArray",
22+
"Categorical",
23+
"DatetimeArray",
24+
"ExtensionArray",
25+
"FloatingArray",
26+
"IntegerArray",
27+
"IntervalArray",
28+
"NumpyExtensionArray",
29+
"PeriodArray",
30+
"SparseArray",
31+
"StringArray",
32+
"TimedeltaArray",
33+
]

pandas-stubs/core/construction.pyi

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,59 @@ from collections.abc import (
22
MutableSequence,
33
Sequence,
44
)
5-
from datetime import datetime
5+
from datetime import (
6+
datetime,
7+
timedelta,
8+
)
9+
import sys
610
from typing import (
711
Any,
812
TypeAlias,
913
overload,
1014
)
1115

1216
import numpy as np
17+
from pandas.core.arrays.base import ExtensionArray
1318
from pandas.core.arrays.boolean import BooleanArray
19+
from pandas.core.arrays.categorical import Categorical
1420
from pandas.core.arrays.datetimes import DatetimeArray
1521
from pandas.core.arrays.floating import FloatingArray
1622
from pandas.core.arrays.integer import IntegerArray
23+
from pandas.core.arrays.interval import IntervalArray
1724
from pandas.core.arrays.numpy_ import NumpyExtensionArray
25+
from pandas.core.arrays.period import PeriodArray
26+
from pandas.core.arrays.sparse.array import SparseArray
1827
from pandas.core.arrays.string_ import (
1928
BaseStringArray,
2029
StringArray,
2130
StringDtype,
2231
)
2332
from pandas.core.arrays.string_arrow import ArrowStringArray
2433
from pandas.core.arrays.timedeltas import TimedeltaArray
34+
from pandas.core.indexes.base import Index
35+
from pandas.core.indexes.category import CategoricalIndex
36+
from pandas.core.indexes.datetimes import DatetimeIndex
37+
from pandas.core.indexes.interval import IntervalIndex
38+
from pandas.core.indexes.period import PeriodIndex
2539
from pandas.core.indexes.range import RangeIndex
2640
from pandas.core.indexes.timedeltas import TimedeltaIndex
2741
from pandas.core.series import Series
2842
from typing_extensions import Never
2943

44+
from pandas._libs.interval import Interval
3045
from pandas._libs.missing import NAType
46+
from pandas._libs.sparse import SparseIndex
3147
from pandas._libs.tslibs.nattype import NaTType
48+
from pandas._libs.tslibs.period import Period
3249
from pandas._libs.tslibs.timedeltas import Timedelta
50+
from pandas._libs.tslibs.timestamps import Timestamp
3351
from pandas._typing import (
3452
BuiltinNotStrDtypeArg,
53+
CategoryDtypeArg,
54+
IntervalT,
3555
Just,
3656
NumpyNotTimeDtypeArg,
57+
NumpyTimedeltaDtypeArg,
3758
NumpyTimestampDtypeArg,
3859
PandasBaseStrDtypeArg,
3960
PandasBooleanDtypeArg,
@@ -51,9 +72,15 @@ from pandas._typing import (
5172
np_ndarray_dt,
5273
np_ndarray_float,
5374
np_ndarray_str,
75+
np_ndarray_td,
5476
)
5577

56-
from pandas.core.dtypes.dtypes import DatetimeTZDtype
78+
from pandas.core.dtypes.base import ExtensionDtype
79+
from pandas.core.dtypes.dtypes import (
80+
DatetimeTZDtype,
81+
IntervalDtype,
82+
PeriodDtype,
83+
)
5784

5885
_NAStrElement: TypeAlias = str | np.str_ | NAType | None
5986
_NaNStrElement: TypeAlias = Just[float] | _NAStrElement
@@ -85,6 +112,51 @@ def array( # type: ignore[overload-overlap] # pyright: ignore[reportOverlapping
85112
dtype: BuiltinNotStrDtypeArg | NumpyNotTimeDtypeArg | None = None,
86113
copy: bool = True,
87114
) -> NumpyExtensionArray: ...
115+
@overload
116+
def array( # type: ignore[overload-overlap]
117+
data: MutableSequence[Any] | np_ndarray | ExtensionArray | Index | Series,
118+
dtype: CategoryDtypeArg,
119+
copy: bool = True,
120+
) -> Categorical: ...
121+
@overload
122+
def array(
123+
# TODO: Categorical Series pandas-dev/pandas-stubs#1415
124+
data: Categorical | CategoricalIndex,
125+
dtype: CategoryDtypeArg | None = None,
126+
copy: bool = True,
127+
) -> Categorical: ...
128+
@overload
129+
def array( # type: ignore[overload-overlap]
130+
data: (
131+
Sequence[Period | NaTType | None] | PeriodArray | PeriodIndex | Series[Period]
132+
),
133+
dtype: PeriodDtype | None = None,
134+
copy: bool = True,
135+
) -> PeriodArray: ...
136+
@overload
137+
def array( # type: ignore[overload-overlap]
138+
# float("nan") also works, but I don't know how to put it in
139+
data: Sequence[IntervalT | None] | IntervalArray | IntervalIndex | Series[Interval],
140+
dtype: IntervalDtype | None = None,
141+
copy: bool = True,
142+
) -> IntervalArray: ...
143+
144+
if sys.version_info >= (3, 11):
145+
@overload
146+
def array(
147+
data: SparseArray | SparseIndex,
148+
dtype: str | np.dtype | ExtensionDtype | None = None,
149+
copy: bool = True,
150+
) -> SparseArray: ...
151+
152+
else:
153+
@overload
154+
def array(
155+
data: SparseArray | SparseIndex,
156+
dtype: str | np.dtype[Any] | ExtensionDtype | None = None,
157+
copy: bool = True,
158+
) -> SparseArray: ...
159+
88160
@overload
89161
def array( # pyright: ignore[reportOverlappingOverload]
90162
data: (
@@ -169,6 +241,31 @@ def array( # type: ignore[overload-overlap]
169241
copy: bool = True,
170242
) -> DatetimeArray: ...
171243
@overload
244+
def array( # type: ignore[overload-overlap]
245+
data: ( # TODO: merge the two Sequence's after 3.0 pandas-dev/pandas#57064
246+
Sequence[datetime | NaTType | None]
247+
| Sequence[np.datetime64 | NaTType | None]
248+
| np_ndarray_dt
249+
| DatetimeArray
250+
| DatetimeIndex
251+
| Series[Timestamp]
252+
),
253+
dtype: PandasTimestampDtypeArg | NumpyTimestampDtypeArg | None = None,
254+
copy: bool = True,
255+
) -> DatetimeArray: ...
256+
@overload
257+
def array( # type: ignore[overload-overlap]
258+
data: (
259+
Sequence[timedelta | np.timedelta64 | NaTType | None]
260+
| np_ndarray_td
261+
| TimedeltaArray
262+
| TimedeltaIndex
263+
| Series[Timedelta]
264+
),
265+
dtype: NumpyTimedeltaDtypeArg | None = None,
266+
copy: bool = True,
267+
) -> TimedeltaArray: ...
268+
@overload
172269
def array( # type: ignore[overload-overlap]
173270
data: _NaNStrData, dtype: StringDtype[Never], copy: bool = True
174271
) -> BaseStringArray: ...

tests/_typing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# ruff: noqa: PLC0414
12
# This file serves as a stub file for static type checkers
23
# (pyright does not like it if I call the file tests/_typing.pyi).
34
# It can only import from pandas._typing.

tests/arrays/test_datetime_array.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import (
1212
TYPE_CHECKING,
1313
Any,
14+
cast,
1415
)
1516
from zoneinfo import ZoneInfo
1617

@@ -276,3 +277,26 @@ def test_properties() -> None:
276277
np_1darray,
277278
np.float64,
278279
)
280+
281+
282+
def test_constructor() -> None:
283+
dt = datetime(2025, 11, 10)
284+
check(assert_type(pd.array([dt]), DatetimeArray), DatetimeArray)
285+
check(assert_type(pd.array([dt, pd.Timestamp(dt)]), DatetimeArray), DatetimeArray)
286+
check(assert_type(pd.array([dt, None]), DatetimeArray), DatetimeArray)
287+
check(assert_type(pd.array([dt, pd.NaT, None]), DatetimeArray), DatetimeArray)
288+
289+
np_dt = np.datetime64(dt)
290+
check(assert_type(pd.array([np_dt]), DatetimeArray), DatetimeArray)
291+
check(assert_type(pd.array([np_dt, None]), DatetimeArray), DatetimeArray)
292+
dt_nat = cast(list[np.datetime64 | NaTType], [np_dt, pd.NaT])
293+
check(assert_type(pd.array(dt_nat), DatetimeArray), DatetimeArray)
294+
295+
np_arr = np.array([dt], np.datetime64)
296+
check(assert_type(pd.array(np_arr), DatetimeArray), DatetimeArray)
297+
298+
check(assert_type(pd.array(pd.array([dt])), DatetimeArray), DatetimeArray)
299+
300+
check(assert_type(pd.array(pd.Index([dt])), DatetimeArray), DatetimeArray)
301+
302+
check(assert_type(pd.array(pd.Series([dt])), DatetimeArray), DatetimeArray)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import pandas as pd
2+
from pandas.core.arrays.interval import IntervalArray
3+
from typing_extensions import assert_type
4+
5+
from tests import check
6+
7+
8+
def test_constructor() -> None:
9+
itv = pd.Interval(0, 1)
10+
check(assert_type(pd.array([itv]), IntervalArray), IntervalArray)
11+
check(assert_type(pd.array([itv, None]), IntervalArray), IntervalArray)
12+
13+
check(assert_type(pd.array(pd.array([itv])), IntervalArray), IntervalArray)
14+
15+
check(assert_type(pd.array(pd.Index([itv])), IntervalArray), IntervalArray)
16+
17+
check(assert_type(pd.array(pd.Series([itv])), IntervalArray), IntervalArray)

tests/arrays/test_period_array.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import pandas as pd
2+
from pandas.core.arrays.period import PeriodArray
3+
from typing_extensions import assert_type
4+
5+
from tests import check
6+
7+
8+
def test_constructor() -> None:
9+
prd = pd.Period("2023-01-01")
10+
check(assert_type(pd.array([prd]), PeriodArray), PeriodArray)
11+
check(assert_type(pd.array([prd, None]), PeriodArray), PeriodArray)
12+
check(assert_type(pd.array([prd, pd.NaT, None]), PeriodArray), PeriodArray)
13+
14+
check(assert_type(pd.array(pd.array([prd])), PeriodArray), PeriodArray)
15+
16+
check(assert_type(pd.array(pd.Index([prd])), PeriodArray), PeriodArray)
17+
18+
check(assert_type(pd.array(pd.Series([prd])), PeriodArray), PeriodArray)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from datetime import timedelta
2+
3+
import numpy as np
4+
import pandas as pd
5+
from pandas.core.arrays.timedeltas import TimedeltaArray
6+
from typing_extensions import assert_type
7+
8+
from tests import check
9+
10+
11+
def test_constructor() -> None:
12+
td = timedelta(2025, 11, 10)
13+
np_dt = np.timedelta64(td)
14+
check(assert_type(pd.array([td]), TimedeltaArray), TimedeltaArray)
15+
check(
16+
assert_type(pd.array([td, pd.Timedelta(td), np_dt]), TimedeltaArray),
17+
TimedeltaArray,
18+
)
19+
check(assert_type(pd.array([td, None]), TimedeltaArray), TimedeltaArray)
20+
check(assert_type(pd.array([td, pd.NaT, None]), TimedeltaArray), TimedeltaArray)
21+
22+
np_arr = np.array([td], np.timedelta64)
23+
check(assert_type(pd.array(np_arr), TimedeltaArray), TimedeltaArray)
24+
25+
check(assert_type(pd.array(pd.array([td])), TimedeltaArray), TimedeltaArray)
26+
27+
check(assert_type(pd.array(pd.Index([td])), TimedeltaArray), TimedeltaArray)
28+
29+
check(assert_type(pd.array(pd.Series([td])), TimedeltaArray), TimedeltaArray)

0 commit comments

Comments
 (0)