Skip to content

Commit 1f98ba7

Browse files
committed
Merge remote-tracking branch 'upstream/main' into pre-commit-ci-update-config
2 parents 4c73129 + 199bf20 commit 1f98ba7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+227
-222
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -470,24 +470,16 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
470470
-i "pandas.merge PR07" \
471471
-i "pandas.merge_asof PR07,RT03" \
472472
-i "pandas.merge_ordered PR07" \
473-
-i "pandas.option_context SA01" \
474473
-i "pandas.period_range RT03,SA01" \
475474
-i "pandas.pivot PR07" \
476475
-i "pandas.pivot_table PR07" \
477476
-i "pandas.plotting.andrews_curves RT03,SA01" \
478-
-i "pandas.plotting.autocorrelation_plot RT03,SA01" \
479477
-i "pandas.plotting.lag_plot RT03,SA01" \
480-
-i "pandas.plotting.parallel_coordinates PR07,RT03,SA01" \
481478
-i "pandas.plotting.scatter_matrix PR07,SA01" \
482-
-i "pandas.plotting.table PR07,RT03,SA01" \
483479
-i "pandas.qcut PR07,SA01" \
484-
-i "pandas.read_orc SA01" \
485480
-i "pandas.read_spss SA01" \
486-
-i "pandas.reset_option SA01" \
487481
-i "pandas.set_eng_float_format RT03,SA01" \
488-
-i "pandas.show_versions SA01" \
489482
-i "pandas.testing.assert_extension_array_equal SA01" \
490-
-i "pandas.testing.assert_series_equal PR07,SA01" \
491483
-i "pandas.tseries.offsets.BDay PR02,SA01" \
492484
-i "pandas.tseries.offsets.BQuarterBegin PR02" \
493485
-i "pandas.tseries.offsets.BQuarterBegin.freqstr SA01" \
@@ -779,7 +771,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
779771
-i "pandas.tseries.offsets.YearEnd.nanos GL08" \
780772
-i "pandas.tseries.offsets.YearEnd.normalize GL08" \
781773
-i "pandas.tseries.offsets.YearEnd.rule_code GL08" \
782-
-i "pandas.unique PR07" \
783774
-i "pandas.util.hash_pandas_object PR07,SA01" # There should be no backslash in the final line, please keep this comment in the last ignored function
784775

785776
RET=$(($RET + $?)) ; echo $MSG "DONE"

doc/source/user_guide/missing_data.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,8 @@ When taking the product, NA values or empty data will be treated as 1.
337337
pd.Series([], dtype="float64").prod()
338338
339339
Cumulative methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`
340-
ignore NA values by default preserve them in the result. This behavior can be changed
341-
with ``skipna``
342-
343-
* Cumulative methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` ignore NA values by default, but preserve them in the resulting arrays. To override this behaviour and include NA values, use ``skipna=False``.
340+
ignore NA values by default, but preserve them in the resulting array. To override
341+
this behaviour and include NA values in the calculation, use ``skipna=False``.
344342

345343

346344
.. ipython:: python

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ Other enhancements
4444
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4545
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
4646
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
47+
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
4748
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
48-
-
4949

5050
.. ---------------------------------------------------------------------------
5151
.. _whatsnew_300.notable_bug_fixes:
@@ -311,6 +311,7 @@ Removal of prior version deprecations/changes
311311
- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`)
312312
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
313313
- Stopped automatically casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
314+
- Stopped performing dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when given a pandas object (:class:`Series`, :class:`Index`, :class:`ExtensionArray`), call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`)
314315
- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
315316
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
316317
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
@@ -536,7 +537,7 @@ Groupby/resample/rolling
536537
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
537538
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
538539
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
539-
540+
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
540541

541542
Reshaping
542543
^^^^^^^^^

pandas/_config/config.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,12 @@ def reset_option(pat: str) -> None:
323323
None
324324
No return value.
325325
326+
See Also
327+
--------
328+
get_option : Retrieve the value of the specified option.
329+
set_option : Set the value of the specified option or options.
330+
describe_option : Print the description for one or more registered options.
331+
326332
Notes
327333
-----
328334
For all available options, please view the
@@ -414,6 +420,13 @@ def option_context(*args) -> Generator[None, None, None]:
414420
None
415421
No return value.
416422
423+
See Also
424+
--------
425+
get_option : Retrieve the value of the specified option.
426+
set_option : Set the value of the specified option.
427+
reset_option : Reset one or more options to their default value.
428+
describe_option : Print the description for one or more registered options.
429+
417430
Notes
418431
-----
419432
For all available options, please view the :ref:`User Guide <options.available>`

pandas/_testing/__init__.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
ContextManager,
1111
cast,
1212
)
13-
import warnings
1413

1514
import numpy as np
1615

@@ -290,17 +289,11 @@ def box_expected(expected, box_cls, transpose: bool = True):
290289
else:
291290
expected = pd.array(expected, copy=False)
292291
elif box_cls is Index:
293-
with warnings.catch_warnings():
294-
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
295-
expected = Index(expected)
292+
expected = Index(expected)
296293
elif box_cls is Series:
297-
with warnings.catch_warnings():
298-
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
299-
expected = Series(expected)
294+
expected = Series(expected)
300295
elif box_cls is DataFrame:
301-
with warnings.catch_warnings():
302-
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
303-
expected = Series(expected).to_frame()
296+
expected = Series(expected).to_frame()
304297
if transpose:
305298
# for vector operations, we need a DataFrame to be a single-row,
306299
# not a single-column, in order to operate against non-DataFrame
@@ -538,8 +531,8 @@ def shares_memory(left, right) -> bool:
538531
left._mask, right._mask
539532
)
540533

541-
if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
542-
arr = left._mgr.arrays[0]
534+
if isinstance(left, DataFrame) and len(left._mgr.blocks) == 1:
535+
arr = left._mgr.blocks[0].values
543536
return shares_memory(arr, right)
544537

545538
raise NotImplementedError(type(left), type(right))

pandas/_testing/asserters.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,9 @@ def assert_series_equal(
857857
Parameters
858858
----------
859859
left : Series
860+
First Series to compare.
860861
right : Series
862+
Second Series to compare.
861863
check_dtype : bool, default True
862864
Whether to check the Series dtype is identical.
863865
check_index_type : bool or {'equiv'}, default 'equiv'
@@ -908,6 +910,11 @@ def assert_series_equal(
908910
909911
.. versionadded:: 1.5.0
910912
913+
See Also
914+
--------
915+
testing.assert_index_equal : Check that two Indexes are equal.
916+
testing.assert_frame_equal : Check that two DataFrames are equal.
917+
911918
Examples
912919
--------
913920
>>> from pandas import testing as tm

pandas/core/algorithms.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,8 @@ def unique(values):
319319
Parameters
320320
----------
321321
values : 1d array-like
322+
The input array-like object containing values from which to extract
323+
unique values.
322324
323325
Returns
324326
-------

pandas/core/construction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ def sanitize_array(
554554
# Avoid ending up with a NumpyExtensionArray
555555
dtype = dtype.numpy_dtype
556556

557-
data_was_index = isinstance(data, ABCIndex)
557+
infer_object = not isinstance(data, (ABCIndex, ABCSeries))
558558

559559
# extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray
560560
data = extract_array(data, extract_numpy=True, extract_range=True)
@@ -607,7 +607,7 @@ def sanitize_array(
607607

608608
if dtype is None:
609609
subarr = data
610-
if data.dtype == object and not data_was_index:
610+
if data.dtype == object and infer_object:
611611
subarr = maybe_infer_to_datetimelike(data)
612612
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
613613
from pandas.core.arrays.string_ import StringDtype

pandas/core/frame.py

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -728,10 +728,6 @@ def __init__(
728728
NDFrame.__init__(self, data)
729729
return
730730

731-
is_pandas_object = isinstance(data, (Series, Index, ExtensionArray))
732-
data_dtype = getattr(data, "dtype", None)
733-
original_dtype = dtype
734-
735731
# GH47215
736732
if isinstance(index, set):
737733
raise ValueError("index cannot be a set")
@@ -896,18 +892,6 @@ def __init__(
896892

897893
NDFrame.__init__(self, mgr)
898894

899-
if original_dtype is None and is_pandas_object and data_dtype == np.object_:
900-
if self.dtypes.iloc[0] != data_dtype:
901-
warnings.warn(
902-
"Dtype inference on a pandas object "
903-
"(Series, Index, ExtensionArray) is deprecated. The DataFrame "
904-
"constructor will keep the original dtype in the future. "
905-
"Call `infer_objects` on the result to get the old "
906-
"behavior.",
907-
FutureWarning,
908-
stacklevel=2,
909-
)
910-
911895
# ----------------------------------------------------------------------
912896

913897
def __dataframe__(
@@ -1062,7 +1046,7 @@ def _is_homogeneous_type(self) -> bool:
10621046
False
10631047
"""
10641048
# The "<" part of "<=" here is for empty DataFrame cases
1065-
return len({arr.dtype for arr in self._mgr.arrays}) <= 1
1049+
return len({block.values.dtype for block in self._mgr.blocks}) <= 1
10661050

10671051
@property
10681052
def _can_fast_transpose(self) -> bool:
@@ -5742,7 +5726,6 @@ def shift(
57425726
periods = cast(int, periods)
57435727

57445728
ncols = len(self.columns)
5745-
arrays = self._mgr.arrays
57465729
if axis == 1 and periods != 0 and ncols > 0 and freq is None:
57475730
if fill_value is lib.no_default:
57485731
# We will infer fill_value to match the closest column
@@ -5768,12 +5751,12 @@ def shift(
57685751

57695752
result.columns = self.columns.copy()
57705753
return result
5771-
elif len(arrays) > 1 or (
5754+
elif len(self._mgr.blocks) > 1 or (
57725755
# If we only have one block and we know that we can't
57735756
# keep the same dtype (i.e. the _can_hold_element check)
57745757
# then we can go through the reindex_indexer path
57755758
# (and avoid casting logic in the Block method).
5776-
not can_hold_element(arrays[0], fill_value)
5759+
not can_hold_element(self._mgr.blocks[0].values, fill_value)
57775760
):
57785761
# GH#35488 we need to watch out for multi-block cases
57795762
# We only get here with fill_value not-lib.no_default
@@ -11469,7 +11452,7 @@ def _get_data() -> DataFrame:
1146911452
if numeric_only:
1147011453
df = _get_data()
1147111454
if axis is None:
11472-
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
11455+
dtype = find_common_type([block.values.dtype for block in df._mgr.blocks])
1147311456
if isinstance(dtype, ExtensionDtype):
1147411457
df = df.astype(dtype)
1147511458
arr = concat_compat(list(df._iter_column_arrays()))
@@ -11494,7 +11477,9 @@ def _get_data() -> DataFrame:
1149411477

1149511478
# kurtosis excluded since groupby does not implement it
1149611479
if df.shape[1] and name != "kurt":
11497-
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
11480+
dtype = find_common_type(
11481+
[block.values.dtype for block in df._mgr.blocks]
11482+
)
1149811483
if isinstance(dtype, ExtensionDtype):
1149911484
# GH 54341: fastpath for EA-backed axis=1 reductions
1150011485
# This flattens the frame into a single 1D array while keeping
@@ -11568,8 +11553,8 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
1156811553
else:
1156911554
raise NotImplementedError(name)
1157011555

11571-
for arr in self._mgr.arrays:
11572-
middle = func(arr, axis=0, skipna=skipna)
11556+
for blocks in self._mgr.blocks:
11557+
middle = func(blocks.values, axis=0, skipna=skipna)
1157311558
result = ufunc(result, middle)
1157411559

1157511560
res_ser = self._constructor_sliced(result, index=self.index, copy=False)

pandas/core/generic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6373,7 +6373,7 @@ def astype(
63736373
# TODO(EA2D): special case not needed with 2D EAs
63746374
dtype = pandas_dtype(dtype)
63756375
if isinstance(dtype, ExtensionDtype) and all(
6376-
arr.dtype == dtype for arr in self._mgr.arrays
6376+
block.values.dtype == dtype for block in self._mgr.blocks
63776377
):
63786378
return self.copy(deep=False)
63796379
# GH 18099/22869: columnwise conversion to extension dtype
@@ -11148,9 +11148,9 @@ def _logical_func(
1114811148
if (
1114911149
self.ndim > 1
1115011150
and axis == 1
11151-
and len(self._mgr.arrays) > 1
11151+
and len(self._mgr.blocks) > 1
1115211152
# TODO(EA2D): special-case not needed
11153-
and all(x.ndim == 2 for x in self._mgr.arrays)
11153+
and all(block.values.ndim == 2 for block in self._mgr.blocks)
1115411154
and not kwargs
1115511155
):
1115611156
# Fastpath avoiding potentially expensive transpose

pandas/core/indexes/base.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -490,8 +490,6 @@ def __new__(
490490
if not copy and isinstance(data, (ABCSeries, Index)):
491491
refs = data._references
492492

493-
is_pandas_object = isinstance(data, (ABCSeries, Index, ExtensionArray))
494-
495493
# range
496494
if isinstance(data, (range, RangeIndex)):
497495
result = RangeIndex(start=data, copy=copy, name=name)
@@ -508,7 +506,7 @@ def __new__(
508506
elif is_ea_or_datetimelike_dtype(data_dtype):
509507
pass
510508

511-
elif isinstance(data, (np.ndarray, Index, ABCSeries)):
509+
elif isinstance(data, (np.ndarray, ABCMultiIndex)):
512510
if isinstance(data, ABCMultiIndex):
513511
data = data._values
514512

@@ -518,7 +516,9 @@ def __new__(
518516
# they are actually ints, e.g. '0' and 0.0
519517
# should not be coerced
520518
data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
521-
519+
elif isinstance(data, (ABCSeries, Index)):
520+
# GH 56244: Avoid potential inference on object types
521+
pass
522522
elif is_scalar(data):
523523
raise cls._raise_scalar_data_error(data)
524524
elif hasattr(data, "__array__"):
@@ -571,19 +571,7 @@ def __new__(
571571
klass = cls._dtype_to_subclass(arr.dtype)
572572

573573
arr = klass._ensure_array(arr, arr.dtype, copy=False)
574-
result = klass._simple_new(arr, name, refs=refs)
575-
if dtype is None and is_pandas_object and data_dtype == np.object_:
576-
if result.dtype != data_dtype:
577-
warnings.warn(
578-
"Dtype inference on a pandas object "
579-
"(Series, Index, ExtensionArray) is deprecated. The Index "
580-
"constructor will keep the original dtype in the future. "
581-
"Call `infer_objects` on the result to get the old "
582-
"behavior.",
583-
FutureWarning,
584-
stacklevel=2,
585-
)
586-
return result # type: ignore[return-value]
574+
return klass._simple_new(arr, name, refs=refs)
587575

588576
@classmethod
589577
def _ensure_array(cls, data, dtype, copy: bool):

pandas/core/indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,10 +1804,10 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None:
18041804

18051805
# if there is only one block/type, still have to take split path
18061806
# unless the block is one-dimensional or it can hold the value
1807-
if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
1807+
if not take_split_path and len(self.obj._mgr.blocks) and self.ndim > 1:
18081808
# in case of dict, keys are indices
18091809
val = list(value.values()) if isinstance(value, dict) else value
1810-
arr = self.obj._mgr.arrays[0]
1810+
arr = self.obj._mgr.blocks[0].values
18111811
take_split_path = not can_hold_element(
18121812
arr, extract_array(val, extract_numpy=True)
18131813
)

pandas/core/internals/construction.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ def ndarray_to_mgr(
192192
) -> Manager:
193193
# used in DataFrame.__init__
194194
# input must be a ndarray, list, Series, Index, ExtensionArray
195+
infer_object = not isinstance(values, (ABCSeries, Index, ExtensionArray))
195196

196197
if isinstance(values, ABCSeries):
197198
if columns is None:
@@ -287,15 +288,14 @@ def ndarray_to_mgr(
287288
# if we don't have a dtype specified, then try to convert objects
288289
# on the entire block; this is to convert if we have datetimelike's
289290
# embedded in an object type
290-
if dtype is None and is_object_dtype(values.dtype):
291+
if dtype is None and infer_object and is_object_dtype(values.dtype):
291292
obj_columns = list(values)
292293
maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns]
293294
# don't convert (and copy) the objects if no type inference occurs
294295
if any(x is not y for x, y in zip(obj_columns, maybe_datetime)):
295-
dvals_list = [ensure_block_shape(dval, 2) for dval in maybe_datetime]
296296
block_values = [
297-
new_block_2d(dvals_list[n], placement=BlockPlacement(n))
298-
for n in range(len(dvals_list))
297+
new_block_2d(ensure_block_shape(dval, 2), placement=BlockPlacement(n))
298+
for n, dval in enumerate(maybe_datetime)
299299
]
300300
else:
301301
bp = BlockPlacement(slice(len(columns)))

0 commit comments

Comments
 (0)