Skip to content

Commit 306a54a

Browse files
committed
Merge remote-tracking branch 'upstream/main' into tst/dataframe-to_hdf-datetime64
2 parents 93fbe50 + b1c2ba7 commit 306a54a

File tree

21 files changed

+266
-28
lines changed

21 files changed

+266
-28
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ jobs:
152152
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
153153

154154
- name: Build wheels
155-
uses: pypa/cibuildwheel@v2.21.3
155+
uses: pypa/cibuildwheel@v2.22.0
156156
with:
157157
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
158158
env:

ci/code_checks.sh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,18 +109,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
109109
-i "pandas.core.resample.Resampler.std SA01" \
110110
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
111111
-i "pandas.core.resample.Resampler.var SA01" \
112-
-i "pandas.errors.ChainedAssignmentError SA01" \
113-
-i "pandas.errors.DuplicateLabelError SA01" \
114112
-i "pandas.errors.IntCastingNaNError SA01" \
115-
-i "pandas.errors.InvalidIndexError SA01" \
116113
-i "pandas.errors.NullFrequencyError SA01" \
117-
-i "pandas.errors.NumExprClobberingError SA01" \
118114
-i "pandas.errors.NumbaUtilError SA01" \
119-
-i "pandas.errors.OutOfBoundsTimedelta SA01" \
120115
-i "pandas.errors.PerformanceWarning SA01" \
121-
-i "pandas.errors.PossibleDataLossError SA01" \
122116
-i "pandas.errors.UndefinedVariableError PR01,SA01" \
123-
-i "pandas.errors.UnsortedIndexError SA01" \
124117
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
125118
-i "pandas.infer_freq SA01" \
126119
-i "pandas.io.json.build_table_schema PR07,RT03,SA01" \

doc/source/user_guide/window.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -567,9 +567,9 @@ One must have :math:`0 < \alpha \leq 1`, and while it is possible to pass
567567
568568
\alpha =
569569
\begin{cases}
570-
\frac{2}{s + 1}, & \text{for span}\ s \geq 1\\
571-
\frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\
572-
1 - \exp^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0
570+
\frac{2}{s + 1}, & \text{for span}\ s \geq 1\\
571+
\frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\
572+
1 - e^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0
573573
\end{cases}
574574
575575
One must specify precisely one of **span**, **center of mass**, **half-life**

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ I/O
690690
- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
691691
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
692692
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
693+
- Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`)
693694
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
694695
- Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
695696
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
@@ -764,7 +765,7 @@ ExtensionArray
764765

765766
Styler
766767
^^^^^^
767-
-
768+
- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed.
768769

769770
Other
770771
^^^^^
@@ -788,6 +789,7 @@ Other
788789
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
789790
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
790791
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
792+
- Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`)
791793
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
792794
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
793795
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)

pandas/_libs/tslibs/np_datetime.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ class OutOfBoundsTimedelta(ValueError):
201201
202202
Representation should be within a timedelta64[ns].
203203
204+
See Also
205+
--------
206+
date_range : Return a fixed frequency DatetimeIndex.
207+
204208
Examples
205209
--------
206210
>>> pd.date_range(start="1/1/1700", freq="B", periods=100000)

pandas/core/arrays/interval.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,9 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
10551055
from pandas import Index
10561056

10571057
fill_value = Index(self._left, copy=False)._na_value
1058-
empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
1058+
empty = IntervalArray.from_breaks(
1059+
[fill_value] * (empty_len + 1), closed=self.closed
1060+
)
10591061
else:
10601062
empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype)
10611063

pandas/core/frame.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4742,7 +4742,8 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
47424742
3 4 4 7 8 0
47434743
4 5 2 6 7 3
47444744
4745-
For columns with spaces in their name, you can use backtick quoting.
4745+
For columns with spaces or other disallowed characters in their name, you can
4746+
use backtick quoting.
47464747
47474748
>>> df.eval("B * `C&C`")
47484749
0 100

pandas/core/series.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ def __arrow_c_stream__(self, requested_schema=None):
567567
Export the pandas Series as an Arrow C stream PyCapsule.
568568
569569
This relies on pyarrow to convert the pandas Series to the Arrow
570-
format (and follows the default behaviour of ``pyarrow.Array.from_pandas``
570+
format (and follows the default behavior of ``pyarrow.Array.from_pandas``
571571
in its handling of the index, i.e. to ignore it).
572572
This conversion is not necessarily zero-copy.
573573
@@ -2226,7 +2226,7 @@ def drop_duplicates(
22262226
5 hippo
22272227
Name: animal, dtype: object
22282228
2229-
With the 'keep' parameter, the selection behaviour of duplicated values
2229+
With the 'keep' parameter, the selection behavior of duplicated values
22302230
can be changed. The value 'first' keeps the first occurrence for each
22312231
set of duplicated entries. The default value of keep is 'first'.
22322232
@@ -3451,7 +3451,7 @@ def sort_values(
34513451
4 5.0
34523452
dtype: float64
34533453
3454-
Sort values ascending order (default behaviour)
3454+
Sort values ascending order (default behavior)
34553455
34563456
>>> s.sort_values(ascending=True)
34573457
1 1.0
@@ -4098,7 +4098,7 @@ def swaplevel(
40984098
40994099
In the following example, we will swap the levels of the indices.
41004100
Here, we will swap the levels column-wise, but levels can be swapped row-wise
4101-
in a similar manner. Note that column-wise is the default behaviour.
4101+
in a similar manner. Note that column-wise is the default behavior.
41024102
By not supplying any arguments for i and j, we swap the last and second to
41034103
last indices.
41044104

pandas/errors/__init__.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ class UnsortedIndexError(KeyError):
100100
101101
Subclass of `KeyError`.
102102
103+
See Also
104+
--------
105+
DataFrame.sort_index : Sort a DataFrame by its index.
106+
DataFrame.set_index : Set the DataFrame index using existing columns.
107+
103108
Examples
104109
--------
105110
>>> df = pd.DataFrame(
@@ -388,6 +393,19 @@ class DuplicateLabelError(ValueError):
388393
"""
389394
Error raised when an operation would introduce duplicate labels.
390395
396+
This error is typically encountered when performing operations on objects
397+
with `allows_duplicate_labels=False` and the operation would result in
398+
duplicate labels in the index. Duplicate labels can lead to ambiguities
399+
in indexing and reduce data integrity.
400+
401+
See Also
402+
--------
403+
Series.set_flags : Return a new ``Series`` object with updated flags.
404+
DataFrame.set_flags : Return a new ``DataFrame`` object with updated flags.
405+
Series.reindex : Conform ``Series`` object to new index with optional filling logic.
406+
DataFrame.reindex : Conform ``DataFrame`` object to new index with optional filling
407+
logic.
408+
391409
Examples
392410
--------
393411
>>> s = pd.Series([0, 1, 2], index=["a", "b", "c"]).set_flags(
@@ -407,6 +425,16 @@ class InvalidIndexError(Exception):
407425
"""
408426
Exception raised when attempting to use an invalid index key.
409427
428+
This exception is triggered when a user attempts to access or manipulate
429+
data in a pandas DataFrame or Series using an index key that is not valid
430+
for the given object. This may occur in cases such as using a malformed
431+
slice, a mismatched key for a ``MultiIndex``, or attempting to access an index
432+
element that does not exist.
433+
434+
See Also
435+
--------
436+
MultiIndex : A multi-level, or hierarchical, index object for pandas objects.
437+
410438
Examples
411439
--------
412440
>>> idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]])
@@ -487,6 +515,11 @@ class ChainedAssignmentError(Warning):
487515
For more information on Copy-on-Write,
488516
see :ref:`the user guide<copy_on_write>`.
489517
518+
See Also
519+
--------
520+
options.mode.copy_on_write : Global setting for enabling or disabling
521+
Copy-on-Write behavior.
522+
490523
Examples
491524
--------
492525
>>> pd.options.mode.copy_on_write = True
@@ -505,6 +538,11 @@ class NumExprClobberingError(NameError):
505538
to 'numexpr'. 'numexpr' is the default engine value for these methods if the
506539
numexpr package is installed.
507540
541+
See Also
542+
--------
543+
eval : Evaluate a Python expression as a string using various backends.
544+
DataFrame.query : Query the columns of a DataFrame with a boolean expression.
545+
508546
Examples
509547
--------
510548
>>> df = pd.DataFrame({"abs": [1, 1, 1]})
@@ -628,6 +666,15 @@ class PossibleDataLossError(Exception):
628666
"""
629667
Exception raised when trying to open a HDFStore file when already opened.
630668
669+
This error is triggered when there is a potential risk of data loss due to
670+
conflicting operations on an HDFStore file. It serves to prevent unintended
671+
overwrites or data corruption by enforcing exclusive access to the file.
672+
673+
See Also
674+
--------
675+
HDFStore : Dict-like IO interface for storing pandas objects in PyTables.
676+
HDFStore.open : Open an HDFStore file in the specified mode.
677+
631678
Examples
632679
--------
633680
>>> store = pd.HDFStore("my-store", "a") # doctest: +SKIP

pandas/io/formats/excel.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
DataFrame,
3838
Index,
3939
MultiIndex,
40+
Period,
4041
PeriodIndex,
4142
)
4243
import pandas.core.common as com
@@ -803,6 +804,9 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
803804
allow_fill=levels._can_hold_na,
804805
fill_value=levels._na_value,
805806
)
807+
# GH#60099
808+
if isinstance(values[0], Period):
809+
values = values.to_timestamp()
806810

807811
for i, span_val in spans.items():
808812
mergestart, mergeend = None, None
@@ -827,6 +831,10 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
827831
# Format hierarchical rows with non-merged values.
828832
for indexcolvals in zip(*self.df.index):
829833
for idx, indexcolval in enumerate(indexcolvals):
834+
# GH#60099
835+
if isinstance(indexcolval, Period):
836+
indexcolval = indexcolval.to_timestamp()
837+
830838
yield CssExcelCell(
831839
row=self.rowcounter + idx,
832840
col=gcolidx,

pandas/io/formats/format.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1749,7 +1749,7 @@ def _trim_zeros_complex(str_complexes: ArrayLike, decimal: str = ".") -> list[st
17491749
# The split will give [{"", "-"}, "xxx", "+/-", "xxx", "j", ""]
17501750
# Therefore, the imaginary part is the 4th and 3rd last elements,
17511751
# and the real part is everything before the imaginary part
1752-
trimmed = re.split(r"([j+-])", x)
1752+
trimmed = re.split(r"(?<!e)([j+-])", x)
17531753
real_part.append("".join(trimmed[:-4]))
17541754
imag_part.append("".join(trimmed[-4:-2]))
17551755

pandas/io/formats/style_render.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,8 @@ def _translate_latex(self, d: dict, clines: str | None) -> None:
868868
or multirow sparsification (so that \multirow and \multicol work correctly).
869869
"""
870870
index_levels = self.index.nlevels
871-
visible_index_level_n = index_levels - sum(self.hide_index_)
871+
# GH 52218
872+
visible_index_level_n = max(1, index_levels - sum(self.hide_index_))
872873
d["head"] = [
873874
[
874875
{**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]}

pandas/tests/frame/methods/test_shift.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,3 +757,12 @@ def test_shift_with_offsets_freq_empty(self):
757757
df_shifted = DataFrame(index=shifted_dates)
758758
result = df.shift(freq=offset)
759759
tm.assert_frame_equal(result, df_shifted)
760+
761+
def test_series_shift_interval_preserves_closed(self):
762+
# GH#60389
763+
ser = Series(
764+
[pd.Interval(1, 2, closed="right"), pd.Interval(2, 3, closed="right")]
765+
)
766+
result = ser.shift(1)
767+
expected = Series([np.nan, pd.Interval(1, 2, closed="right")])
768+
tm.assert_series_equal(result, expected)

pandas/tests/io/excel/test_style.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
from pandas import (
1111
DataFrame,
12+
MultiIndex,
13+
Timestamp,
14+
period_range,
1215
read_excel,
1316
)
1417
import pandas._testing as tm
@@ -333,3 +336,26 @@ def test_styler_to_s3(s3_public_bucket, s3so):
333336
f"s3://{mock_bucket_name}/{target_file}", index_col=0, storage_options=s3so
334337
)
335338
tm.assert_frame_equal(result, df)
339+
340+
341+
@pytest.mark.parametrize("merge_cells", [True, False, "columns"])
342+
def test_format_hierarchical_rows_periodindex(merge_cells):
343+
# GH#60099
344+
df = DataFrame(
345+
{"A": [1, 2]},
346+
index=MultiIndex.from_arrays(
347+
[
348+
period_range(start="2006-10-06", end="2006-10-07", freq="D"),
349+
["X", "Y"],
350+
],
351+
names=["date", "category"],
352+
),
353+
)
354+
formatter = ExcelFormatter(df, merge_cells=merge_cells)
355+
formatted_cells = formatter._format_hierarchical_rows()
356+
357+
for cell in formatted_cells:
358+
if cell.row != 0 and cell.col == 0:
359+
assert isinstance(
360+
cell.val, Timestamp
361+
), "Period should be converted to Timestamp"

pandas/tests/io/excel/test_writers.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
MultiIndex,
2424
date_range,
2525
option_context,
26+
period_range,
2627
)
2728
import pandas._testing as tm
2829

@@ -335,6 +336,43 @@ def test_multiindex_interval_datetimes(self, tmp_excel):
335336
)
336337
tm.assert_frame_equal(result, expected)
337338

339+
@pytest.mark.parametrize("merge_cells", [True, False, "columns"])
340+
def test_excel_round_trip_with_periodindex(self, tmp_excel, merge_cells):
341+
# GH#60099
342+
df = DataFrame(
343+
{"A": [1, 2]},
344+
index=MultiIndex.from_arrays(
345+
[
346+
period_range(start="2006-10-06", end="2006-10-07", freq="D"),
347+
["X", "Y"],
348+
],
349+
names=["date", "category"],
350+
),
351+
)
352+
df.to_excel(tmp_excel, merge_cells=merge_cells)
353+
result = pd.read_excel(tmp_excel, index_col=[0, 1])
354+
expected = DataFrame(
355+
{"A": [1, 2]},
356+
MultiIndex.from_arrays(
357+
[
358+
[
359+
pd.to_datetime("2006-10-06 00:00:00"),
360+
pd.to_datetime("2006-10-07 00:00:00"),
361+
],
362+
["X", "Y"],
363+
],
364+
names=["date", "category"],
365+
),
366+
)
367+
time_format = (
368+
"datetime64[s]" if tmp_excel.endswith(".ods") else "datetime64[us]"
369+
)
370+
expected.index = expected.index.set_levels(
371+
expected.index.levels[0].astype(time_format), level=0
372+
)
373+
374+
tm.assert_frame_equal(result, expected)
375+
338376

339377
@pytest.mark.parametrize(
340378
"engine,ext",

0 commit comments

Comments
 (0)