pandas-dev
diff --git a/‎ci/deps/actions-310-minimum_versions.yaml
-1 b/‎ci/deps/actions-310-minimum_versions.yaml
-1
diff --git a/‎ci/deps/actions-310.yaml
+1-2 b/‎ci/deps/actions-310.yaml
+1-2
diff --git a/‎ci/deps/actions-311-downstream_compat.yaml
+1-4 b/‎ci/deps/actions-311-downstream_compat.yaml
+1-4
diff --git a/‎ci/deps/actions-311.yaml
+1-2 b/‎ci/deps/actions-311.yaml
+1-2
diff --git a/‎ci/deps/actions-312.yaml
+1-2 b/‎ci/deps/actions-312.yaml
+1-2
diff --git a/‎ci/deps/actions-313.yaml
+1-1 b/‎ci/deps/actions-313.yaml
+1-1
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
63.4 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
63.4 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
187 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
187 KB
diff --git a/‎doc/cheatsheet/README.md
+1-1 b/‎doc/cheatsheet/README.md
+1-1
diff --git a/‎doc/source/getting_started/install.rst
-1 b/‎doc/source/getting_started/install.rst
-1
diff --git a/‎doc/source/getting_started/intro_tutorials/includes/titanic.rst
+1-1 b/‎doc/source/getting_started/intro_tutorials/includes/titanic.rst
+1-1
diff --git a/‎environment.yml
+1-4 b/‎environment.yml
+1-4
diff --git a/‎pandas/compat/_optional.py
-1 b/‎pandas/compat/_optional.py
-1
diff --git a/‎pandas/core/arrays/string_.py
+4-7 b/‎pandas/core/arrays/string_.py
+4-7
diff --git a/‎pandas/core/generic.py
+7-7 b/‎pandas/core/generic.py
+7-7
diff --git a/‎pandas/io/formats/format.py
-3 b/‎pandas/io/formats/format.py
-3
diff --git a/‎pandas/tests/arrays/string_/test_string.py
+12 b/‎pandas/tests/arrays/string_/test_string.py
+12
diff --git a/‎pandas/tests/generic/test_to_xarray.py
+28-24 b/‎pandas/tests/generic/test_to_xarray.py
+28-24
diff --git a/‎pandas/tests/io/formats/test_to_string.py
+3-3 b/‎pandas/tests/io/formats/test_to_string.py
+3-3
@@ -26,7 +26,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4=4.12.3
-  - blosc=1.21.3
   - bottleneck=1.3.6
   - fastparquet=2024.2.0
   - fsspec=2024.2.0
 
@@ -24,7 +24,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc>=1.21.3
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
 
@@ -25,7 +25,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc>=1.21.3
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -53,7 +52,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
@@ -63,14 +62,12 @@ dependencies:
   - cftime
   - dask
   - ipython
-  - geopandas-base
   - seaborn
   - scikit-learn
   - statsmodels
   - coverage
   - pandas-datareader
   - pyyaml
-  - py
   - pip:
     - adbc-driver-postgresql>=0.10.0
     - adbc-driver-sqlite>=0.8.0
 
@@ -24,7 +24,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc>=1.21.3
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
 
@@ -24,7 +24,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc>=1.21.3
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
 
@@ -52,7 +52,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
 
@@ -12,7 +12,7 @@ This cheat sheet, originally written by Irv Lustig, [Princeton Consultants](http
 | Pandas_Cheat_Sheet_JA  | Japanese    | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx" target="_parent"><img  src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
 | Pandas_Cheat_Sheet_FA  | Persian     | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_FA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_FA.pptx" target="_parent"><img  src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
 
-
+The English version has additional material that is not in the versions in other languages.
 
 **Alternative**
 
 
@@ -305,7 +305,6 @@ Installable with ``pip install "pandas[hdf5, parquet, feather, spss, excel]"``
 Dependency                                             Minimum Version    pip extra        Notes
 ====================================================== ================== ================ ==========================================================
 `PyTables <https://github.com/PyTables/PyTables>`__    3.8.0              hdf5             HDF5-based reading / writing
-`blosc <https://github.com/Blosc/c-blosc>`__           1.21.3             hdf5             Compression for HDF5; only available on ``conda``
 `zlib <https://github.com/madler/zlib>`__                                 hdf5             Compression for HDF5
 `fastparquet <https://github.com/dask/fastparquet>`__  2024.2.0           -                Parquet reading / writing (pyarrow is default)
 `pyarrow <https://github.com/apache/arrow>`__          10.0.1             parquet, feather Parquet, ORC, and feather reading / writing
 
@@ -11,7 +11,7 @@ This tutorial uses the Titanic data set, stored as CSV. The data
 consists of the following data columns:
 
 -  PassengerId: Id of every passenger.
--  Survived: Indication whether passenger survived. ``0`` for yes and ``1`` for no.
+-  Survived: Indication whether passenger survived. ``0`` for no and ``1`` for yes.
 -  Pclass: One out of the 3 ticket classes: Class ``1``, Class ``2`` and Class ``3``.
 -  Name: Name of passenger.
 -  Sex: Gender of passenger.
 
@@ -27,7 +27,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -55,7 +54,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
@@ -83,8 +82,6 @@ dependencies:
 
   # documentation
   - gitpython  # obtain contributors from git for whatsnew
-  - gitdb
-  - google-auth
   - natsort  # DataFrame.sort_values doctest
   - numpydoc
   - pydata-sphinx-theme=0.16
 
@@ -23,7 +23,6 @@
     "adbc-driver-postgresql": "0.10.0",
     "adbc-driver-sqlite": "0.8.0",
     "bs4": "4.12.3",
-    "blosc": "1.21.3",
     "bottleneck": "1.3.6",
     "fastparquet": "2024.2.0",
     "fsspec": "2024.2.0",
 
@@ -123,10 +123,10 @@ class StringDtype(StorageExtensionDtype):
     Examples
     --------
     >>> pd.StringDtype()
-    string[python]
+    <StringDtype(storage='python', na_value=<NA>)>
 
     >>> pd.StringDtype(storage="pyarrow")
-    string[pyarrow]
+    <StringDtype(na_value=<NA>)>
     """
 
     @property
@@ -198,11 +198,8 @@ def __init__(
         self._na_value = na_value
 
     def __repr__(self) -> str:
-        if self._na_value is libmissing.NA:
-            return f"{self.name}[{self.storage}]"
-        else:
-            # TODO add more informative repr
-            return self.name
+        storage = "" if self.storage == "pyarrow" else "storage='python', "
+        return f"<StringDtype({storage}na_value={self._na_value})>"
 
     def __eq__(self, other: object) -> bool:
         # we need to override the base class __eq__ because na_value (NA or NaN)
 
@@ -3964,7 +3964,7 @@ def take(self, indices, axis: Axis = 0, **kwargs) -> Self:
         ----------
         indices : array-like
             An array of ints indicating which positions to take.
-        axis : {0 or 'index', 1 or 'columns', None}, default 0
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             The axis on which to select elements. ``0`` means that we are
             selecting rows, ``1`` means that we are selecting columns.
             For `Series` this parameter is unused and defaults to 0.
@@ -6819,12 +6819,12 @@ def convert_dtypes(
         2  3  z   <NA>  <NA>    20  200.0
 
         >>> dfn.dtypes
-        a             Int32
-        b    string[python]
-        c           boolean
-        d    string[python]
-        e             Int64
-        f           Float64
+        a      Int32
+        b     string
+        c    boolean
+        d     string
+        e      Int64
+        f    Float64
         dtype: object
 
         Start with a Series of strings and missing data represented by ``np.nan``.
 
@@ -67,7 +67,6 @@
     ExtensionArray,
     TimedeltaArray,
 )
-from pandas.core.arrays.string_ import StringDtype
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.indexes.api import (
@@ -1218,8 +1217,6 @@ def _format(x):
                 return self.na_rep
             elif isinstance(x, PandasObject):
                 return str(x)
-            elif isinstance(x, StringDtype):
-                return repr(x)
             else:
                 # object dtype
                 return str(formatter(x))
 
@@ -103,6 +103,18 @@ def test_repr(dtype):
     assert repr(df.A.array) == expected
 
 
+def test_dtype_repr(dtype):
+    if dtype.storage == "pyarrow":
+        if dtype.na_value is pd.NA:
+            assert repr(dtype) == "<StringDtype(na_value=<NA>)>"
+        else:
+            assert repr(dtype) == "<StringDtype(na_value=nan)>"
+    elif dtype.na_value is pd.NA:
+        assert repr(dtype) == "<StringDtype(storage='python', na_value=<NA>)>"
+    else:
+        assert repr(dtype) == "<StringDtype(storage='python', na_value=nan)>"
+
+
 def test_none_to_nan(cls, dtype):
     a = cls._from_sequence(["a", None, "b"], dtype=dtype)
     assert a[1] is not None
 
@@ -6,11 +6,13 @@
     DataFrame,
     MultiIndex,
     Series,
+    StringDtype,
     date_range,
 )
 import pandas._testing as tm
+from pandas.util.version import Version
 
-pytest.importorskip("xarray")
+xarray = pytest.importorskip("xarray")
 
 
 class TestDataFrameToXArray:
@@ -29,13 +31,17 @@ def df(self):
             }
         )
 
-    def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
+    def test_to_xarray_index_types(self, index_flat, df, request):
         index = index_flat
         # MultiIndex is tested in test_to_xarray_with_multiindex
         if len(index) == 0:
             pytest.skip("Test doesn't make sense for empty index")
-
-        from xarray import Dataset
+        elif Version(xarray.__version__) <= Version("2024.9.0"):
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason="Categorical column not preserved.",
+                )
+            )
 
         df.index = index[:4]
         df.index.name = "foo"
@@ -45,29 +51,22 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
         assert len(result.coords) == 1
         assert len(result.data_vars) == 8
         tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
-        assert isinstance(result, Dataset)
+        assert isinstance(result, xarray.Dataset)
 
         # idempotency
         # datetimes w/tz are preserved
         # column names are lost
         expected = df.copy()
-        expected["f"] = expected["f"].astype(
-            object if not using_infer_string else "str"
-        )
         expected.columns.name = None
         tm.assert_frame_equal(result.to_dataframe(), expected)
 
     def test_to_xarray_empty(self, df):
-        from xarray import Dataset
-
         df.index.name = "foo"
         result = df[0:0].to_xarray()
         assert result.sizes["foo"] == 0
-        assert isinstance(result, Dataset)
+        assert isinstance(result, xarray.Dataset)
 
     def test_to_xarray_with_multiindex(self, df, using_infer_string):
-        from xarray import Dataset
-
         # MultiIndex
         df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
         result = df.to_xarray()
@@ -76,7 +75,7 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
         assert len(result.coords) == 2
         assert len(result.data_vars) == 8
         tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
-        assert isinstance(result, Dataset)
+        assert isinstance(result, xarray.Dataset)
 
         result = result.to_dataframe()
         expected = df.copy()
@@ -88,43 +87,48 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
 
 
 class TestSeriesToXArray:
-    def test_to_xarray_index_types(self, index_flat):
+    def test_to_xarray_index_types(self, index_flat, request):
         index = index_flat
+        if (
+            isinstance(index.dtype, StringDtype)
+            and index.dtype.storage == "pyarrow"
+            and Version(xarray.__version__) > Version("2024.9.0")
+        ):
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason="xarray calling reshape of ArrowExtensionArray",
+                    raises=NotImplementedError,
+                )
+            )
         # MultiIndex is tested in test_to_xarray_with_multiindex
 
-        from xarray import DataArray
-
         ser = Series(range(len(index)), index=index, dtype="int64")
         ser.index.name = "foo"
         result = ser.to_xarray()
         repr(result)
         assert len(result) == len(index)
         assert len(result.coords) == 1
         tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
-        assert isinstance(result, DataArray)
+        assert isinstance(result, xarray.DataArray)
 
         # idempotency
         tm.assert_series_equal(result.to_series(), ser)
 
     def test_to_xarray_empty(self):
-        from xarray import DataArray
-
         ser = Series([], dtype=object)
         ser.index.name = "foo"
         result = ser.to_xarray()
         assert len(result) == 0
         assert len(result.coords) == 1
         tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
-        assert isinstance(result, DataArray)
+        assert isinstance(result, xarray.DataArray)
 
     def test_to_xarray_with_multiindex(self):
-        from xarray import DataArray
-
         mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"])
         ser = Series(range(6), dtype="int64", index=mi)
         result = ser.to_xarray()
         assert len(result) == 2
         tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
-        assert isinstance(result, DataArray)
+        assert isinstance(result, xarray.DataArray)
         res = result.to_series()
         tm.assert_series_equal(res, ser)
@@ -777,9 +777,9 @@ def test_to_string_string_dtype(self):
         result = df.dtypes.to_string()
         expected = dedent(
             """\
-            x    string[pyarrow]
-            y     string[python]
-            z     int64[pyarrow]"""
+            x            string
+            y            string
+            z    int64[pyarrow]"""
         )
         assert result == expected