Merge remote-tracking branch 'upstream/main' into tst/dataframe-to_hdf-datetime64

KevsterAmp · KevsterAmp · commit 306a54ab112e · 2024-11-26T19:14:48.000+08:00
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -152,7 +152,7 @@ jobs:
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.21.3
+        uses: pypa/cibuildwheel@v2.22.0
         with:
          package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -109,18 +109,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.resample.Resampler.std SA01" \
         -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
         -i "pandas.core.resample.Resampler.var SA01" \
-        -i "pandas.errors.ChainedAssignmentError SA01" \
-        -i "pandas.errors.DuplicateLabelError SA01" \
         -i "pandas.errors.IntCastingNaNError SA01" \
-        -i "pandas.errors.InvalidIndexError SA01" \
         -i "pandas.errors.NullFrequencyError SA01" \
-        -i "pandas.errors.NumExprClobberingError SA01" \
         -i "pandas.errors.NumbaUtilError SA01" \
-        -i "pandas.errors.OutOfBoundsTimedelta SA01" \
         -i "pandas.errors.PerformanceWarning SA01" \
-        -i "pandas.errors.PossibleDataLossError SA01" \
         -i "pandas.errors.UndefinedVariableError PR01,SA01" \
-        -i "pandas.errors.UnsortedIndexError SA01" \
         -i "pandas.errors.ValueLabelTypeMismatch SA01" \
         -i "pandas.infer_freq SA01" \
         -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst
@@ -567,9 +567,9 @@ One must have :math:`0 < \alpha \leq 1`, and while it is possible to pass
 
    \alpha =
     \begin{cases}
-        \frac{2}{s + 1},               & \text{for span}\ s \geq 1\\
-        \frac{1}{1 + c},               & \text{for center of mass}\ c \geq 0\\
-        1 - \exp^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0
+        \frac{2}{s + 1},            & \text{for span}\ s \geq 1\\
+        \frac{1}{1 + c},            & \text{for center of mass}\ c \geq 0\\
+        1 - e^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0
     \end{cases}
 
 One must specify precisely one of **span**, **center of mass**, **half-life**
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -690,6 +690,7 @@ I/O
 - Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
 - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
+- Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`)
 - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
 - Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
@@ -764,7 +765,7 @@ ExtensionArray
 
 Styler
 ^^^^^^
--
+- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed.
 
 Other
 ^^^^^
@@ -788,6 +789,7 @@ Other
 - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
 - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
 - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
+- Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`)
 - Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
 - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
 - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
@@ -201,6 +201,10 @@ class OutOfBoundsTimedelta(ValueError):
 
     Representation should be within a timedelta64[ns].
 
+    See Also
+    --------
+    date_range : Return a fixed frequency DatetimeIndex.
+
     Examples
     --------
     >>> pd.date_range(start="1/1/1700", freq="B", periods=100000)
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -1055,7 +1055,9 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
             from pandas import Index
 
             fill_value = Index(self._left, copy=False)._na_value
-            empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
+            empty = IntervalArray.from_breaks(
+                [fill_value] * (empty_len + 1), closed=self.closed
+            )
         else:
             empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4742,7 +4742,8 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
         3  4   4    7   8  0
         4  5   2    6   7  3
 
-        For columns with spaces in their name, you can use backtick quoting.
+        For columns with spaces or other disallowed characters in their name, you can
+        use backtick quoting.
 
         >>> df.eval("B * `C&C`")
         0    100
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -567,7 +567,7 @@ def __arrow_c_stream__(self, requested_schema=None):
         Export the pandas Series as an Arrow C stream PyCapsule.
 
         This relies on pyarrow to convert the pandas Series to the Arrow
-        format (and follows the default behaviour of ``pyarrow.Array.from_pandas``
+        format (and follows the default behavior of ``pyarrow.Array.from_pandas``
         in its handling of the index, i.e. to ignore it).
         This conversion is not necessarily zero-copy.
 
@@ -2226,7 +2226,7 @@ def drop_duplicates(
         5     hippo
         Name: animal, dtype: object
 
-        With the 'keep' parameter, the selection behaviour of duplicated values
+        With the 'keep' parameter, the selection behavior of duplicated values
         can be changed. The value 'first' keeps the first occurrence for each
         set of duplicated entries. The default value of keep is 'first'.
 
@@ -3451,7 +3451,7 @@ def sort_values(
         4     5.0
         dtype: float64
 
-        Sort values ascending order (default behaviour)
+        Sort values ascending order (default behavior)
 
         >>> s.sort_values(ascending=True)
         1     1.0
@@ -4098,7 +4098,7 @@ def swaplevel(
 
         In the following example, we will swap the levels of the indices.
         Here, we will swap the levels column-wise, but levels can be swapped row-wise
-        in a similar manner. Note that column-wise is the default behaviour.
+        in a similar manner. Note that column-wise is the default behavior.
         By not supplying any arguments for i and j, we swap the last and second to
         last indices.
 
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
@@ -100,6 +100,11 @@ class UnsortedIndexError(KeyError):
 
     Subclass of `KeyError`.
 
+    See Also
+    --------
+    DataFrame.sort_index : Sort a DataFrame by its index.
+    DataFrame.set_index : Set the DataFrame index using existing columns.
+
     Examples
     --------
     >>> df = pd.DataFrame(
@@ -388,6 +393,19 @@ class DuplicateLabelError(ValueError):
     """
     Error raised when an operation would introduce duplicate labels.
 
+    This error is typically encountered when performing operations on objects
+    with `allows_duplicate_labels=False` and the operation would result in
+    duplicate labels in the index. Duplicate labels can lead to ambiguities
+    in indexing and reduce data integrity.
+
+    See Also
+    --------
+    Series.set_flags : Return a new ``Series`` object with updated flags.
+    DataFrame.set_flags : Return a new ``DataFrame`` object with updated flags.
+    Series.reindex : Conform ``Series`` object to new index with optional filling logic.
+    DataFrame.reindex : Conform ``DataFrame`` object to new index with optional filling
+        logic.
+
     Examples
     --------
     >>> s = pd.Series([0, 1, 2], index=["a", "b", "c"]).set_flags(
@@ -407,6 +425,16 @@ class InvalidIndexError(Exception):
     """
     Exception raised when attempting to use an invalid index key.
 
+    This exception is triggered when a user attempts to access or manipulate
+    data in a pandas DataFrame or Series using an index key that is not valid
+    for the given object. This may occur in cases such as using a malformed
+    slice, a mismatched key for a ``MultiIndex``, or attempting to access an index
+    element that does not exist.
+
+    See Also
+    --------
+    MultiIndex : A multi-level, or hierarchical, index object for pandas objects.
+
     Examples
     --------
     >>> idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]])
@@ -487,6 +515,11 @@ class ChainedAssignmentError(Warning):
     For more information on Copy-on-Write,
     see :ref:`the user guide<copy_on_write>`.
 
+    See Also
+    --------
+    options.mode.copy_on_write : Global setting for enabling or disabling
+        Copy-on-Write behavior.
+
     Examples
     --------
     >>> pd.options.mode.copy_on_write = True
@@ -505,6 +538,11 @@ class NumExprClobberingError(NameError):
     to 'numexpr'. 'numexpr' is the default engine value for these methods if the
     numexpr package is installed.
 
+    See Also
+    --------
+    eval : Evaluate a Python expression as a string using various backends.
+    DataFrame.query : Query the columns of a DataFrame with a boolean expression.
+
     Examples
     --------
     >>> df = pd.DataFrame({"abs": [1, 1, 1]})
@@ -628,6 +666,15 @@ class PossibleDataLossError(Exception):
     """
     Exception raised when trying to open a HDFStore file when already opened.
 
+    This error is triggered when there is a potential risk of data loss due to
+    conflicting operations on an HDFStore file. It serves to prevent unintended
+    overwrites or data corruption by enforcing exclusive access to the file.
+
+    See Also
+    --------
+    HDFStore : Dict-like IO interface for storing pandas objects in PyTables.
+    HDFStore.open : Open an HDFStore file in the specified mode.
+
     Examples
     --------
     >>> store = pd.HDFStore("my-store", "a")  # doctest: +SKIP
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -37,6 +37,7 @@
     DataFrame,
     Index,
     MultiIndex,
+    Period,
     PeriodIndex,
 )
 import pandas.core.common as com
@@ -803,6 +804,9 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
                         allow_fill=levels._can_hold_na,
                         fill_value=levels._na_value,
                     )
+                    # GH#60099
+                    if isinstance(values[0], Period):
+                        values = values.to_timestamp()
 
                     for i, span_val in spans.items():
                         mergestart, mergeend = None, None
@@ -827,6 +831,10 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
                 # Format hierarchical rows with non-merged values.
                 for indexcolvals in zip(*self.df.index):
                     for idx, indexcolval in enumerate(indexcolvals):
+                        # GH#60099
+                        if isinstance(indexcolval, Period):
+                            indexcolval = indexcolval.to_timestamp()
+
                         yield CssExcelCell(
                             row=self.rowcounter + idx,
                             col=gcolidx,
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1749,7 +1749,7 @@ def _trim_zeros_complex(str_complexes: ArrayLike, decimal: str = ".") -> list[st
         # The split will give [{"", "-"}, "xxx", "+/-", "xxx", "j", ""]
         # Therefore, the imaginary part is the 4th and 3rd last elements,
         # and the real part is everything before the imaginary part
-        trimmed = re.split(r"([j+-])", x)
+        trimmed = re.split(r"(?<!e)([j+-])", x)
         real_part.append("".join(trimmed[:-4]))
         imag_part.append("".join(trimmed[-4:-2]))
 
diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -868,7 +868,8 @@ def _translate_latex(self, d: dict, clines: str | None) -> None:
             or multirow sparsification (so that \multirow and \multicol work correctly).
         """
         index_levels = self.index.nlevels
-        visible_index_level_n = index_levels - sum(self.hide_index_)
+        # GH 52218
+        visible_index_level_n = max(1, index_levels - sum(self.hide_index_))
         d["head"] = [
             [
                 {**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]}
diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py
@@ -757,3 +757,12 @@ def test_shift_with_offsets_freq_empty(self):
         df_shifted = DataFrame(index=shifted_dates)
         result = df.shift(freq=offset)
         tm.assert_frame_equal(result, df_shifted)
+
+    def test_series_shift_interval_preserves_closed(self):
+        # GH#60389
+        ser = Series(
+            [pd.Interval(1, 2, closed="right"), pd.Interval(2, 3, closed="right")]
+        )
+        result = ser.shift(1)
+        expected = Series([np.nan, pd.Interval(1, 2, closed="right")])
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py
@@ -9,6 +9,9 @@
 
 from pandas import (
     DataFrame,
+    MultiIndex,
+    Timestamp,
+    period_range,
     read_excel,
 )
 import pandas._testing as tm
@@ -333,3 +336,26 @@ def test_styler_to_s3(s3_public_bucket, s3so):
             f"s3://{mock_bucket_name}/{target_file}", index_col=0, storage_options=s3so
         )
         tm.assert_frame_equal(result, df)
+
+
+@pytest.mark.parametrize("merge_cells", [True, False, "columns"])
+def test_format_hierarchical_rows_periodindex(merge_cells):
+    # GH#60099
+    df = DataFrame(
+        {"A": [1, 2]},
+        index=MultiIndex.from_arrays(
+            [
+                period_range(start="2006-10-06", end="2006-10-07", freq="D"),
+                ["X", "Y"],
+            ],
+            names=["date", "category"],
+        ),
+    )
+    formatter = ExcelFormatter(df, merge_cells=merge_cells)
+    formatted_cells = formatter._format_hierarchical_rows()
+
+    for cell in formatted_cells:
+        if cell.row != 0 and cell.col == 0:
+            assert isinstance(
+                cell.val, Timestamp
+            ), "Period should be converted to Timestamp"
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -23,6 +23,7 @@
     MultiIndex,
     date_range,
     option_context,
+    period_range,
 )
 import pandas._testing as tm
 
@@ -335,6 +336,43 @@ def test_multiindex_interval_datetimes(self, tmp_excel):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("merge_cells", [True, False, "columns"])
+    def test_excel_round_trip_with_periodindex(self, tmp_excel, merge_cells):
+        # GH#60099
+        df = DataFrame(
+            {"A": [1, 2]},
+            index=MultiIndex.from_arrays(
+                [
+                    period_range(start="2006-10-06", end="2006-10-07", freq="D"),
+                    ["X", "Y"],
+                ],
+                names=["date", "category"],
+            ),
+        )
+        df.to_excel(tmp_excel, merge_cells=merge_cells)
+        result = pd.read_excel(tmp_excel, index_col=[0, 1])
+        expected = DataFrame(
+            {"A": [1, 2]},
+            MultiIndex.from_arrays(
+                [
+                    [
+                        pd.to_datetime("2006-10-06 00:00:00"),
+                        pd.to_datetime("2006-10-07 00:00:00"),
+                    ],
+                    ["X", "Y"],
+                ],
+                names=["date", "category"],
+            ),
+        )
+        time_format = (
+            "datetime64[s]" if tmp_excel.endswith(".ods") else "datetime64[us]"
+        )
+        expected.index = expected.index.set_levels(
+            expected.index.levels[0].astype(time_format), level=0
+        )
+
+        tm.assert_frame_equal(result, expected)
+
 
 @pytest.mark.parametrize(
     "engine,ext",
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html
diff --git a/web/pandas/index.html b/web/pandas/index.html