Skip to content

Commit 8eec135

Browse files
committed
Merge branch 'main' of github.com:pandas-dev/pandas into translations
2 parents 17063a7 + f496acf commit 8eec135

26 files changed

+67
-82
lines changed

ci/deps/actions-310-minimum_versions.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ dependencies:
2626

2727
# optional dependencies
2828
- beautifulsoup4=4.12.3
29-
- blosc=1.21.3
3029
- bottleneck=1.3.6
3130
- fastparquet=2024.2.0
3231
- fsspec=2024.2.0

ci/deps/actions-310.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.12.3
27-
- blosc>=1.21.3
2827
- bottleneck>=1.3.6
2928
- fastparquet>=2024.2.0
3029
- fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
5251
- scipy>=1.12.0
5352
- sqlalchemy>=2.0.0
5453
- tabulate>=0.9.0
55-
- xarray>=2024.1.1, <=2024.9.0
54+
- xarray>=2024.1.1
5655
- xlrd>=2.0.1
5756
- xlsxwriter>=3.2.0
5857
- zstandard>=0.22.0

ci/deps/actions-311-downstream_compat.yaml

+1-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ dependencies:
2525

2626
# optional dependencies
2727
- beautifulsoup4>=4.12.3
28-
- blosc>=1.21.3
2928
- bottleneck>=1.3.6
3029
- fastparquet>=2024.2.0
3130
- fsspec>=2024.2.0
@@ -53,7 +52,7 @@ dependencies:
5352
- scipy>=1.12.0
5453
- sqlalchemy>=2.0.0
5554
- tabulate>=0.9.0
56-
- xarray>=2024.1.1, <=2024.9.0
55+
- xarray>=2024.1.1
5756
- xlrd>=2.0.1
5857
- xlsxwriter>=3.2.0
5958
- zstandard>=0.22.0
@@ -63,14 +62,12 @@ dependencies:
6362
- cftime
6463
- dask
6564
- ipython
66-
- geopandas-base
6765
- seaborn
6866
- scikit-learn
6967
- statsmodels
7068
- coverage
7169
- pandas-datareader
7270
- pyyaml
73-
- py
7471
- pip:
7572
- adbc-driver-postgresql>=0.10.0
7673
- adbc-driver-sqlite>=0.8.0

ci/deps/actions-311.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.12.3
27-
- blosc>=1.21.3
2827
- bottleneck>=1.3.6
2928
- fastparquet>=2024.2.0
3029
- fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
5251
- scipy>=1.12.0
5352
- sqlalchemy>=2.0.0
5453
- tabulate>=0.9.0
55-
- xarray>=2024.1.1, <=2024.9.0
54+
- xarray>=2024.1.1
5655
- xlrd>=2.0.1
5756
- xlsxwriter>=3.2.0
5857
- zstandard>=0.22.0

ci/deps/actions-312.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.12.3
27-
- blosc>=1.21.3
2827
- bottleneck>=1.3.6
2928
- fastparquet>=2024.2.0
3029
- fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
5251
- scipy>=1.12.0
5352
- sqlalchemy>=2.0.0
5453
- tabulate>=0.9.0
55-
- xarray>=2024.1.1, <=2024.9.0
54+
- xarray>=2024.1.1
5655
- xlrd>=2.0.1
5756
- xlsxwriter>=3.2.0
5857
- zstandard>=0.22.0

ci/deps/actions-313.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ dependencies:
5252
- scipy>=1.12.0
5353
- sqlalchemy>=2.0.0
5454
- tabulate>=0.9.0
55-
- xarray>=2024.1.1, <=2024.9.0
55+
- xarray>=2024.1.1
5656
- xlrd>=2.0.1
5757
- xlsxwriter>=3.2.0
5858
- zstandard>=0.22.0

doc/cheatsheet/Pandas_Cheat_Sheet.pdf

63.4 KB
Binary file not shown.
187 KB
Binary file not shown.

doc/cheatsheet/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ This cheat sheet, originally written by Irv Lustig, [Princeton Consultants](http
1212
| Pandas_Cheat_Sheet_JA | Japanese | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
1313
| Pandas_Cheat_Sheet_FA | Persian | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_FA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_FA.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
1414

15-
15+
The English version has additional material that is not in the versions in other languages.
1616

1717
**Alternative**
1818

doc/source/getting_started/install.rst

-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ Installable with ``pip install "pandas[hdf5, parquet, feather, spss, excel]"``
305305
Dependency Minimum Version pip extra Notes
306306
====================================================== ================== ================ ==========================================================
307307
`PyTables <https://github.com/PyTables/PyTables>`__ 3.8.0 hdf5 HDF5-based reading / writing
308-
`blosc <https://github.com/Blosc/c-blosc>`__ 1.21.3 hdf5 Compression for HDF5; only available on ``conda``
309308
`zlib <https://github.com/madler/zlib>`__ hdf5 Compression for HDF5
310309
`fastparquet <https://github.com/dask/fastparquet>`__ 2024.2.0 - Parquet reading / writing (pyarrow is default)
311310
`pyarrow <https://github.com/apache/arrow>`__ 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing

doc/source/getting_started/intro_tutorials/includes/titanic.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ This tutorial uses the Titanic data set, stored as CSV. The data
1111
consists of the following data columns:
1212

1313
- PassengerId: Id of every passenger.
14-
- Survived: Indication whether passenger survived. ``0`` for yes and ``1`` for no.
14+
- Survived: Indication whether passenger survived. ``0`` for no and ``1`` for yes.
1515
- Pclass: One out of the 3 ticket classes: Class ``1``, Class ``2`` and Class ``3``.
1616
- Name: Name of passenger.
1717
- Sex: Gender of passenger.

environment.yml

+1-4
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ dependencies:
2727

2828
# optional dependencies
2929
- beautifulsoup4>=4.12.3
30-
- blosc
3130
- bottleneck>=1.3.6
3231
- fastparquet>=2024.2.0
3332
- fsspec>=2024.2.0
@@ -55,7 +54,7 @@ dependencies:
5554
- scipy>=1.12.0
5655
- sqlalchemy>=2.0.0
5756
- tabulate>=0.9.0
58-
- xarray>=2024.1.1, <=2024.9.0
57+
- xarray>=2024.1.1
5958
- xlrd>=2.0.1
6059
- xlsxwriter>=3.2.0
6160
- zstandard>=0.22.0
@@ -83,8 +82,6 @@ dependencies:
8382

8483
# documentation
8584
- gitpython # obtain contributors from git for whatsnew
86-
- gitdb
87-
- google-auth
8885
- natsort # DataFrame.sort_values doctest
8986
- numpydoc
9087
- pydata-sphinx-theme=0.16

pandas/compat/_optional.py

-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
"adbc-driver-postgresql": "0.10.0",
2424
"adbc-driver-sqlite": "0.8.0",
2525
"bs4": "4.12.3",
26-
"blosc": "1.21.3",
2726
"bottleneck": "1.3.6",
2827
"fastparquet": "2024.2.0",
2928
"fsspec": "2024.2.0",

pandas/core/arrays/string_.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,10 @@ class StringDtype(StorageExtensionDtype):
123123
Examples
124124
--------
125125
>>> pd.StringDtype()
126-
string[python]
126+
<StringDtype(storage='python', na_value=<NA>)>
127127
128128
>>> pd.StringDtype(storage="pyarrow")
129-
string[pyarrow]
129+
<StringDtype(na_value=<NA>)>
130130
"""
131131

132132
@property
@@ -198,11 +198,8 @@ def __init__(
198198
self._na_value = na_value
199199

200200
def __repr__(self) -> str:
201-
if self._na_value is libmissing.NA:
202-
return f"{self.name}[{self.storage}]"
203-
else:
204-
# TODO add more informative repr
205-
return self.name
201+
storage = "" if self.storage == "pyarrow" else "storage='python', "
202+
return f"<StringDtype({storage}na_value={self._na_value})>"
206203

207204
def __eq__(self, other: object) -> bool:
208205
# we need to override the base class __eq__ because na_value (NA or NaN)

pandas/core/generic.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -3964,7 +3964,7 @@ def take(self, indices, axis: Axis = 0, **kwargs) -> Self:
39643964
----------
39653965
indices : array-like
39663966
An array of ints indicating which positions to take.
3967-
axis : {0 or 'index', 1 or 'columns', None}, default 0
3967+
axis : {0 or 'index', 1 or 'columns'}, default 0
39683968
The axis on which to select elements. ``0`` means that we are
39693969
selecting rows, ``1`` means that we are selecting columns.
39703970
For `Series` this parameter is unused and defaults to 0.
@@ -6819,12 +6819,12 @@ def convert_dtypes(
68196819
2 3 z <NA> <NA> 20 200.0
68206820
68216821
>>> dfn.dtypes
6822-
a Int32
6823-
b string[python]
6824-
c boolean
6825-
d string[python]
6826-
e Int64
6827-
f Float64
6822+
a Int32
6823+
b string
6824+
c boolean
6825+
d string
6826+
e Int64
6827+
f Float64
68286828
dtype: object
68296829
68306830
Start with a Series of strings and missing data represented by ``np.nan``.

pandas/io/formats/format.py

-3
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
ExtensionArray,
6868
TimedeltaArray,
6969
)
70-
from pandas.core.arrays.string_ import StringDtype
7170
from pandas.core.base import PandasObject
7271
import pandas.core.common as com
7372
from pandas.core.indexes.api import (
@@ -1218,8 +1217,6 @@ def _format(x):
12181217
return self.na_rep
12191218
elif isinstance(x, PandasObject):
12201219
return str(x)
1221-
elif isinstance(x, StringDtype):
1222-
return repr(x)
12231220
else:
12241221
# object dtype
12251222
return str(formatter(x))

pandas/tests/arrays/string_/test_string.py

+12
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,18 @@ def test_repr(dtype):
103103
assert repr(df.A.array) == expected
104104

105105

106+
def test_dtype_repr(dtype):
107+
if dtype.storage == "pyarrow":
108+
if dtype.na_value is pd.NA:
109+
assert repr(dtype) == "<StringDtype(na_value=<NA>)>"
110+
else:
111+
assert repr(dtype) == "<StringDtype(na_value=nan)>"
112+
elif dtype.na_value is pd.NA:
113+
assert repr(dtype) == "<StringDtype(storage='python', na_value=<NA>)>"
114+
else:
115+
assert repr(dtype) == "<StringDtype(storage='python', na_value=nan)>"
116+
117+
106118
def test_none_to_nan(cls, dtype):
107119
a = cls._from_sequence(["a", None, "b"], dtype=dtype)
108120
assert a[1] is not None

pandas/tests/generic/test_to_xarray.py

+28-24
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
DataFrame,
77
MultiIndex,
88
Series,
9+
StringDtype,
910
date_range,
1011
)
1112
import pandas._testing as tm
13+
from pandas.util.version import Version
1214

13-
pytest.importorskip("xarray")
15+
xarray = pytest.importorskip("xarray")
1416

1517

1618
class TestDataFrameToXArray:
@@ -29,13 +31,17 @@ def df(self):
2931
}
3032
)
3133

32-
def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
34+
def test_to_xarray_index_types(self, index_flat, df, request):
3335
index = index_flat
3436
# MultiIndex is tested in test_to_xarray_with_multiindex
3537
if len(index) == 0:
3638
pytest.skip("Test doesn't make sense for empty index")
37-
38-
from xarray import Dataset
39+
elif Version(xarray.__version__) <= Version("2024.9.0"):
40+
request.applymarker(
41+
pytest.mark.xfail(
42+
reason="Categorical column not preserved.",
43+
)
44+
)
3945

4046
df.index = index[:4]
4147
df.index.name = "foo"
@@ -45,29 +51,22 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
4551
assert len(result.coords) == 1
4652
assert len(result.data_vars) == 8
4753
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
48-
assert isinstance(result, Dataset)
54+
assert isinstance(result, xarray.Dataset)
4955

5056
# idempotency
5157
# datetimes w/tz are preserved
5258
# column names are lost
5359
expected = df.copy()
54-
expected["f"] = expected["f"].astype(
55-
object if not using_infer_string else "str"
56-
)
5760
expected.columns.name = None
5861
tm.assert_frame_equal(result.to_dataframe(), expected)
5962

6063
def test_to_xarray_empty(self, df):
61-
from xarray import Dataset
62-
6364
df.index.name = "foo"
6465
result = df[0:0].to_xarray()
6566
assert result.sizes["foo"] == 0
66-
assert isinstance(result, Dataset)
67+
assert isinstance(result, xarray.Dataset)
6768

6869
def test_to_xarray_with_multiindex(self, df, using_infer_string):
69-
from xarray import Dataset
70-
7170
# MultiIndex
7271
df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
7372
result = df.to_xarray()
@@ -76,7 +75,7 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
7675
assert len(result.coords) == 2
7776
assert len(result.data_vars) == 8
7877
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
79-
assert isinstance(result, Dataset)
78+
assert isinstance(result, xarray.Dataset)
8079

8180
result = result.to_dataframe()
8281
expected = df.copy()
@@ -88,43 +87,48 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
8887

8988

9089
class TestSeriesToXArray:
91-
def test_to_xarray_index_types(self, index_flat):
90+
def test_to_xarray_index_types(self, index_flat, request):
9291
index = index_flat
92+
if (
93+
isinstance(index.dtype, StringDtype)
94+
and index.dtype.storage == "pyarrow"
95+
and Version(xarray.__version__) > Version("2024.9.0")
96+
):
97+
request.applymarker(
98+
pytest.mark.xfail(
99+
reason="xarray calling reshape of ArrowExtensionArray",
100+
raises=NotImplementedError,
101+
)
102+
)
93103
# MultiIndex is tested in test_to_xarray_with_multiindex
94104

95-
from xarray import DataArray
96-
97105
ser = Series(range(len(index)), index=index, dtype="int64")
98106
ser.index.name = "foo"
99107
result = ser.to_xarray()
100108
repr(result)
101109
assert len(result) == len(index)
102110
assert len(result.coords) == 1
103111
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
104-
assert isinstance(result, DataArray)
112+
assert isinstance(result, xarray.DataArray)
105113

106114
# idempotency
107115
tm.assert_series_equal(result.to_series(), ser)
108116

109117
def test_to_xarray_empty(self):
110-
from xarray import DataArray
111-
112118
ser = Series([], dtype=object)
113119
ser.index.name = "foo"
114120
result = ser.to_xarray()
115121
assert len(result) == 0
116122
assert len(result.coords) == 1
117123
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
118-
assert isinstance(result, DataArray)
124+
assert isinstance(result, xarray.DataArray)
119125

120126
def test_to_xarray_with_multiindex(self):
121-
from xarray import DataArray
122-
123127
mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"])
124128
ser = Series(range(6), dtype="int64", index=mi)
125129
result = ser.to_xarray()
126130
assert len(result) == 2
127131
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
128-
assert isinstance(result, DataArray)
132+
assert isinstance(result, xarray.DataArray)
129133
res = result.to_series()
130134
tm.assert_series_equal(res, ser)

pandas/tests/io/formats/test_to_string.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -777,9 +777,9 @@ def test_to_string_string_dtype(self):
777777
result = df.dtypes.to_string()
778778
expected = dedent(
779779
"""\
780-
x string[pyarrow]
781-
y string[python]
782-
z int64[pyarrow]"""
780+
x string
781+
y string
782+
z int64[pyarrow]"""
783783
)
784784
assert result == expected
785785

0 commit comments

Comments
 (0)