pandas-dev
diff --git a/‎Dockerfile
Lines changed: 12 additions & 8 deletions b/‎Dockerfile
Lines changed: 12 additions & 8 deletions
diff --git a/‎ci/deps/actions-310-minimum_versions.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-310-minimum_versions.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/deps/actions-310.yaml
Lines changed: 1 addition & 2 deletions b/‎ci/deps/actions-310.yaml
Lines changed: 1 addition & 2 deletions
diff --git a/‎ci/deps/actions-311-downstream_compat.yaml
Lines changed: 1 addition & 4 deletions b/‎ci/deps/actions-311-downstream_compat.yaml
Lines changed: 1 addition & 4 deletions
diff --git a/‎ci/deps/actions-311.yaml
Lines changed: 1 addition & 2 deletions b/‎ci/deps/actions-311.yaml
Lines changed: 1 addition & 2 deletions
diff --git a/‎ci/deps/actions-312.yaml
Lines changed: 1 addition & 2 deletions b/‎ci/deps/actions-312.yaml
Lines changed: 1 addition & 2 deletions
diff --git a/‎ci/deps/actions-313.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-313.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
63.4 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
63.4 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
187 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
187 KB
diff --git a/‎doc/cheatsheet/README.md
Lines changed: 1 addition & 1 deletion b/‎doc/cheatsheet/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/getting_started/install.rst
Lines changed: 0 additions & 1 deletion b/‎doc/source/getting_started/install.rst
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/source/getting_started/intro_tutorials/includes/titanic.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/intro_tutorials/includes/titanic.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/io.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎environment.yml
Lines changed: 1 addition & 4 deletions b/‎environment.yml
Lines changed: 1 addition & 4 deletions
diff --git a/‎pandas/compat/_optional.py
Lines changed: 0 additions & 1 deletion b/‎pandas/compat/_optional.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎pandas/core/arrays/string_.py
Lines changed: 4 additions & 7 deletions b/‎pandas/core/arrays/string_.py
Lines changed: 4 additions & 7 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 7 additions & 7 deletions b/‎pandas/core/generic.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎pandas/io/formats/format.py
Lines changed: 0 additions & 3 deletions b/‎pandas/io/formats/format.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎pandas/tests/arrays/string_/test_string.py
Lines changed: 12 additions & 0 deletions b/‎pandas/tests/arrays/string_/test_string.py
Lines changed: 12 additions & 0 deletions
@@ -1,16 +1,20 @@
 FROM python:3.10.8
 WORKDIR /home/pandas
 
-RUN apt-get update && apt-get -y upgrade
-RUN apt-get install -y build-essential bash-completion
+RUN apt-get update && \
+    apt-get --no-install-recommends -y upgrade && \
+    apt-get --no-install-recommends -y install \
+    build-essential \
+    bash-completion \
+    # hdf5 needed for pytables installation
+    libhdf5-dev \
+    # libgles2-mesa needed for pytest-qt
+    libgles2-mesa-dev && \
+    rm -rf /var/lib/apt/lists/*
 
-# hdf5 needed for pytables installation
-# libgles2-mesa needed for pytest-qt
-RUN apt-get install -y libhdf5-dev libgles2-mesa-dev
-
-RUN python -m pip install --upgrade pip
 COPY requirements-dev.txt /tmp
-RUN python -m pip install -r /tmp/requirements-dev.txt
+RUN python -m pip install --no-cache-dir --upgrade pip && \
+    python -m pip install --no-cache-dir -r /tmp/requirements-dev.txt
 RUN git config --global --add safe.directory /home/pandas
 
 ENV SHELL="/bin/bash"
 
@@ -26,7 +26,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4=4.12.3
-  - blosc=1.21.3
   - bottleneck=1.3.6
   - fastparquet=2024.2.0
   - fsspec=2024.2.0
 
@@ -24,7 +24,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc>=1.21.3
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
 
@@ -25,7 +25,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc>=1.21.3
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -53,7 +52,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
@@ -63,14 +62,12 @@ dependencies:
   - cftime
   - dask
   - ipython
-  - geopandas-base
   - seaborn
   - scikit-learn
   - statsmodels
   - coverage
   - pandas-datareader
   - pyyaml
-  - py
   - pip:
     - adbc-driver-postgresql>=0.10.0
     - adbc-driver-sqlite>=0.8.0
 
@@ -24,7 +24,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc>=1.21.3
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
 
@@ -24,7 +24,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc>=1.21.3
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
 
@@ -52,7 +52,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
 
@@ -12,7 +12,7 @@ This cheat sheet, originally written by Irv Lustig, [Princeton Consultants](http
 | Pandas_Cheat_Sheet_JA  | Japanese    | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx" target="_parent"><img  src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
 | Pandas_Cheat_Sheet_FA  | Persian     | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_FA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_FA.pptx" target="_parent"><img  src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
 
-
+The English version has additional material that is not in the versions in other languages.
 
 **Alternative**
 
 
@@ -305,7 +305,6 @@ Installable with ``pip install "pandas[hdf5, parquet, feather, spss, excel]"``
 Dependency                                             Minimum Version    pip extra        Notes
 ====================================================== ================== ================ ==========================================================
 `PyTables <https://github.com/PyTables/PyTables>`__    3.8.0              hdf5             HDF5-based reading / writing
-`blosc <https://github.com/Blosc/c-blosc>`__           1.21.3             hdf5             Compression for HDF5; only available on ``conda``
 `zlib <https://github.com/madler/zlib>`__                                 hdf5             Compression for HDF5
 `fastparquet <https://github.com/dask/fastparquet>`__  2024.2.0           -                Parquet reading / writing (pyarrow is default)
 `pyarrow <https://github.com/apache/arrow>`__          10.0.1             parquet, feather Parquet, ORC, and feather reading / writing
 
@@ -11,7 +11,7 @@ This tutorial uses the Titanic data set, stored as CSV. The data
 consists of the following data columns:
 
 -  PassengerId: Id of every passenger.
--  Survived: Indication whether passenger survived. ``0`` for yes and ``1`` for no.
+-  Survived: Indication whether passenger survived. ``0`` for no and ``1`` for yes.
 -  Pclass: One out of the 3 ticket classes: Class ``1``, Class ``2`` and Class ``3``.
 -  Name: Name of passenger.
 -  Sex: Gender of passenger.
 
@@ -26,7 +26,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
     text, Local clipboard, :ref:`read_clipboard<io.clipboard>`, :ref:`to_clipboard<io.clipboard>`
     binary,`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__ , :ref:`read_excel<io.excel_reader>`, :ref:`to_excel<io.excel_writer>`
     binary,`OpenDocument <http://opendocumentformat.org>`__, :ref:`read_excel<io.ods>`, NA
-    binary,`HDF5 Format <https://support.hdfgroup.org/HDF5/whatishdf5.html>`__, :ref:`read_hdf<io.hdf5>`, :ref:`to_hdf<io.hdf5>`
+    binary,`HDF5 Format <https://support.hdfgroup.org/documentation/hdf5/latest/_intro_h_d_f5.html>`__, :ref:`read_hdf<io.hdf5>`, :ref:`to_hdf<io.hdf5>`
     binary,`Feather Format <https://github.com/wesm/feather>`__, :ref:`read_feather<io.feather>`, :ref:`to_feather<io.feather>`
     binary,`Parquet Format <https://parquet.apache.org/>`__, :ref:`read_parquet<io.parquet>`, :ref:`to_parquet<io.parquet>`
     binary,`ORC Format <https://orc.apache.org/>`__, :ref:`read_orc<io.orc>`, :ref:`to_orc<io.orc>`
 
@@ -27,7 +27,6 @@ dependencies:
 
   # optional dependencies
   - beautifulsoup4>=4.12.3
-  - blosc
   - bottleneck>=1.3.6
   - fastparquet>=2024.2.0
   - fsspec>=2024.2.0
@@ -55,7 +54,7 @@ dependencies:
   - scipy>=1.12.0
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
-  - xarray>=2024.1.1, <=2024.9.0
+  - xarray>=2024.1.1
   - xlrd>=2.0.1
   - xlsxwriter>=3.2.0
   - zstandard>=0.22.0
@@ -83,8 +82,6 @@ dependencies:
 
   # documentation
   - gitpython  # obtain contributors from git for whatsnew
-  - gitdb
-  - google-auth
   - natsort  # DataFrame.sort_values doctest
   - numpydoc
   - pydata-sphinx-theme=0.16
 
@@ -23,7 +23,6 @@
     "adbc-driver-postgresql": "0.10.0",
     "adbc-driver-sqlite": "0.8.0",
     "bs4": "4.12.3",
-    "blosc": "1.21.3",
     "bottleneck": "1.3.6",
     "fastparquet": "2024.2.0",
     "fsspec": "2024.2.0",
 
@@ -123,10 +123,10 @@ class StringDtype(StorageExtensionDtype):
     Examples
     --------
     >>> pd.StringDtype()
-    string[python]
+    <StringDtype(storage='python', na_value=<NA>)>
 
     >>> pd.StringDtype(storage="pyarrow")
-    string[pyarrow]
+    <StringDtype(na_value=<NA>)>
     """
 
     @property
@@ -198,11 +198,8 @@ def __init__(
         self._na_value = na_value
 
     def __repr__(self) -> str:
-        if self._na_value is libmissing.NA:
-            return f"{self.name}[{self.storage}]"
-        else:
-            # TODO add more informative repr
-            return self.name
+        storage = "" if self.storage == "pyarrow" else "storage='python', "
+        return f"<StringDtype({storage}na_value={self._na_value})>"
 
     def __eq__(self, other: object) -> bool:
         # we need to override the base class __eq__ because na_value (NA or NaN)
 
@@ -3964,7 +3964,7 @@ def take(self, indices, axis: Axis = 0, **kwargs) -> Self:
         ----------
         indices : array-like
             An array of ints indicating which positions to take.
-        axis : {0 or 'index', 1 or 'columns', None}, default 0
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             The axis on which to select elements. ``0`` means that we are
             selecting rows, ``1`` means that we are selecting columns.
             For `Series` this parameter is unused and defaults to 0.
@@ -6819,12 +6819,12 @@ def convert_dtypes(
         2  3  z   <NA>  <NA>    20  200.0
 
         >>> dfn.dtypes
-        a             Int32
-        b    string[python]
-        c           boolean
-        d    string[python]
-        e             Int64
-        f           Float64
+        a      Int32
+        b     string
+        c    boolean
+        d     string
+        e      Int64
+        f    Float64
         dtype: object
 
         Start with a Series of strings and missing data represented by ``np.nan``.
 
@@ -67,7 +67,6 @@
     ExtensionArray,
     TimedeltaArray,
 )
-from pandas.core.arrays.string_ import StringDtype
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.indexes.api import (
@@ -1218,8 +1217,6 @@ def _format(x):
                 return self.na_rep
             elif isinstance(x, PandasObject):
                 return str(x)
-            elif isinstance(x, StringDtype):
-                return repr(x)
             else:
                 # object dtype
                 return str(formatter(x))
 
@@ -103,6 +103,18 @@ def test_repr(dtype):
     assert repr(df.A.array) == expected
 
 
+def test_dtype_repr(dtype):
+    if dtype.storage == "pyarrow":
+        if dtype.na_value is pd.NA:
+            assert repr(dtype) == "<StringDtype(na_value=<NA>)>"
+        else:
+            assert repr(dtype) == "<StringDtype(na_value=nan)>"
+    elif dtype.na_value is pd.NA:
+        assert repr(dtype) == "<StringDtype(storage='python', na_value=<NA>)>"
+    else:
+        assert repr(dtype) == "<StringDtype(storage='python', na_value=nan)>"
+
+
 def test_none_to_nan(cls, dtype):
     a = cls._from_sequence(["a", None, "b"], dtype=dtype)
     assert a[1] is not None