Skip to content

Commit adc9703

Browse files
authored
Merge pull request #173 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 76835fb + 4aeb8f2 commit adc9703

35 files changed

+1440
-930
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ repos:
5151
hooks:
5252
- id: isort
5353
- repo: https://github.com/asottile/pyupgrade
54-
rev: v2.11.0
54+
rev: v2.12.0
5555
hooks:
5656
- id: pyupgrade
5757
args: [--py37-plus]

doc/source/whatsnew/v1.2.4.rst

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.. _whatsnew_124:
22

3-
What's new in 1.2.4 (April ??, 2021)
4-
---------------------------------------
3+
What's new in 1.2.4 (April 12, 2021)
4+
------------------------------------
55

66
These are the changes in pandas 1.2.4. See :ref:`release` for a full changelog
77
including other versions of pandas.
@@ -20,27 +20,8 @@ Fixed regressions
2020
- Fixed regression in (in)equality comparison of ``pd.NaT`` with a non-datetimelike numpy array returning a scalar instead of an array (:issue:`40722`)
2121
- Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`)
2222
- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`)
23-
-
24-
25-
.. ---------------------------------------------------------------------------
26-
27-
.. _whatsnew_124.bug_fixes:
28-
29-
Bug fixes
30-
~~~~~~~~~
31-
32-
-
33-
-
34-
35-
.. ---------------------------------------------------------------------------
36-
37-
.. _whatsnew_124.other:
38-
39-
Other
40-
~~~~~
41-
42-
-
43-
-
23+
- Fixed regression in repr of floats in an ``object`` column not respecting ``float_format`` when printed in the console or outputted through :meth:`DataFrame.to_string`, :meth:`DataFrame.to_html`, and :meth:`DataFrame.to_latex` (:issue:`40024`)
24+
- Fixed regression in NumPy ufuncs such as ``np.add`` not passing through all arguments for :class:`DataFrame` (:issue:`40662`)
4425

4526
.. ---------------------------------------------------------------------------
4627

doc/source/whatsnew/v1.3.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,7 @@ Performance improvements
584584
- Performance improvement in :class:`Styler` where render times are more than 50% reduced (:issue:`39972` :issue:`39952`)
585585
- Performance improvement in :meth:`core.window.ewm.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`)
586586
- Performance improvement in :meth:`.GroupBy.apply` when requiring the python fallback implementation (:issue:`40176`)
587+
- Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`)
587588

588589
.. ---------------------------------------------------------------------------
589590
@@ -786,6 +787,7 @@ Reshaping
786787
^^^^^^^^^
787788
- Bug in :func:`merge` raising error when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`)
788789
- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
790+
- Bug in :func:`merge_asof` propagating the right Index with ``left_index=True`` and ``right_on`` specification instead of left Index (:issue:`33463`)
789791
- Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
790792
- :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
791793
- Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
@@ -813,6 +815,7 @@ ExtensionArray
813815

814816
- Bug in :meth:`DataFrame.where` when ``other`` is a :class:`Series` with :class:`ExtensionArray` dtype (:issue:`38729`)
815817
- Fixed bug where :meth:`Series.idxmax`, :meth:`Series.idxmin` and ``argmax/min`` fail when the underlying data is :class:`ExtensionArray` (:issue:`32749`, :issue:`33719`, :issue:`36566`)
818+
- Fixed a bug where some properties of subclasses of :class:`PandasExtensionDtype` where improperly cached (:issue:`40329`)
816819
-
817820

818821
Other

pandas/core/arraylike.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,15 +357,17 @@ def reconstruct(result):
357357
# * len(inputs) > 1 is doable when we know that we have
358358
# aligned blocks / dtypes.
359359
inputs = tuple(np.asarray(x) for x in inputs)
360-
result = getattr(ufunc, method)(*inputs)
360+
result = getattr(ufunc, method)(*inputs, **kwargs)
361361
elif self.ndim == 1:
362362
# ufunc(series, ...)
363363
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
364364
result = getattr(ufunc, method)(*inputs, **kwargs)
365365
else:
366366
# ufunc(dataframe)
367-
if method == "__call__":
367+
if method == "__call__" and not kwargs:
368368
# for np.<ufunc>(..) calls
369+
# kwargs cannot necessarily be handled block-by-block, so only
370+
# take this path if there are no kwargs
369371
mgr = inputs[0]._mgr
370372
result = mgr.apply(getattr(ufunc, method))
371373
else:

pandas/core/dtypes/concat.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,13 @@
3030
)
3131

3232

33-
def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
33+
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
3434
"""
3535
Helper function for `arr.astype(common_dtype)` but handling all special
3636
cases.
3737
"""
38+
if is_dtype_equal(arr.dtype, dtype):
39+
return arr
3840
if (
3941
is_categorical_dtype(arr.dtype)
4042
and isinstance(dtype, np.dtype)
@@ -121,7 +123,7 @@ def is_nonempty(x) -> bool:
121123
# for axis=0
122124
if not single_dtype:
123125
target_dtype = find_common_type([x.dtype for x in to_concat])
124-
to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
126+
to_concat = [cast_to_common_type(arr, target_dtype) for arr in to_concat]
125127

126128
if isinstance(to_concat[0], ExtensionArray):
127129
cls = type(to_concat[0])

pandas/core/dtypes/dtypes.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pytz
1616

1717
from pandas._libs.interval import Interval
18+
from pandas._libs.properties import cache_readonly
1819
from pandas._libs.tslibs import (
1920
BaseOffset,
2021
NaT,
@@ -81,7 +82,7 @@ class PandasExtensionDtype(ExtensionDtype):
8182
base: DtypeObj | None = None
8283
isbuiltin = 0
8384
isnative = 0
84-
_cache: dict[str_type, PandasExtensionDtype] = {}
85+
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}
8586

8687
def __str__(self) -> str_type:
8788
"""
@@ -105,7 +106,7 @@ def __getstate__(self) -> dict[str_type, Any]:
105106
@classmethod
106107
def reset_cache(cls) -> None:
107108
""" clear the cache """
108-
cls._cache = {}
109+
cls._cache_dtypes = {}
109110

110111

111112
class CategoricalDtypeType(type):
@@ -177,7 +178,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
177178
str = "|O08"
178179
base = np.dtype("O")
179180
_metadata = ("categories", "ordered")
180-
_cache: dict[str_type, PandasExtensionDtype] = {}
181+
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}
181182

182183
def __init__(self, categories=None, ordered: Ordered = False):
183184
self._finalize(categories, ordered, fastpath=False)
@@ -355,7 +356,7 @@ def __hash__(self) -> int:
355356
else:
356357
return -2
357358
# We *do* want to include the real self.ordered here
358-
return int(self._hash_categories(self.categories, self.ordered))
359+
return int(self._hash_categories)
359360

360361
def __eq__(self, other: Any) -> bool:
361362
"""
@@ -429,14 +430,17 @@ def __repr__(self) -> str_type:
429430
data = data.rstrip(", ")
430431
return f"CategoricalDtype(categories={data}, ordered={self.ordered})"
431432

432-
@staticmethod
433-
def _hash_categories(categories, ordered: Ordered = True) -> int:
433+
@cache_readonly
434+
def _hash_categories(self) -> int:
434435
from pandas.core.util.hashing import (
435436
combine_hash_arrays,
436437
hash_array,
437438
hash_tuples,
438439
)
439440

441+
categories = self.categories
442+
ordered = self.ordered
443+
440444
if len(categories) and isinstance(categories[0], tuple):
441445
# assumes if any individual category is a tuple, then all our. ATM
442446
# I don't really want to support just some of the categories being
@@ -671,7 +675,7 @@ class DatetimeTZDtype(PandasExtensionDtype):
671675
na_value = NaT
672676
_metadata = ("unit", "tz")
673677
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
674-
_cache: dict[str_type, PandasExtensionDtype] = {}
678+
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}
675679

676680
def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None):
677681
if isinstance(unit, DatetimeTZDtype):
@@ -837,7 +841,7 @@ class PeriodDtype(dtypes.PeriodDtypeBase, PandasExtensionDtype):
837841
num = 102
838842
_metadata = ("freq",)
839843
_match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
840-
_cache: dict[str_type, PandasExtensionDtype] = {}
844+
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}
841845

842846
def __new__(cls, freq=None):
843847
"""
@@ -859,12 +863,12 @@ def __new__(cls, freq=None):
859863
freq = cls._parse_dtype_strict(freq)
860864

861865
try:
862-
return cls._cache[freq.freqstr]
866+
return cls._cache_dtypes[freq.freqstr]
863867
except KeyError:
864868
dtype_code = freq._period_dtype_code
865869
u = dtypes.PeriodDtypeBase.__new__(cls, dtype_code)
866870
u._freq = freq
867-
cls._cache[freq.freqstr] = u
871+
cls._cache_dtypes[freq.freqstr] = u
868872
return u
869873

870874
def __reduce__(self):
@@ -1042,7 +1046,7 @@ class IntervalDtype(PandasExtensionDtype):
10421046
_match = re.compile(
10431047
r"(I|i)nterval\[(?P<subtype>[^,]+)(, (?P<closed>(right|left|both|neither)))?\]"
10441048
)
1045-
_cache: dict[str_type, PandasExtensionDtype] = {}
1049+
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}
10461050

10471051
def __new__(cls, subtype=None, closed: str_type | None = None):
10481052
from pandas.core.dtypes.common import (
@@ -1099,12 +1103,12 @@ def __new__(cls, subtype=None, closed: str_type | None = None):
10991103

11001104
key = str(subtype) + str(closed)
11011105
try:
1102-
return cls._cache[key]
1106+
return cls._cache_dtypes[key]
11031107
except KeyError:
11041108
u = object.__new__(cls)
11051109
u._subtype = subtype
11061110
u._closed = closed
1107-
cls._cache[key] = u
1111+
cls._cache_dtypes[key] = u
11081112
return u
11091113

11101114
@property

pandas/core/generic.py

Lines changed: 112 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7352,16 +7352,124 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace):
73527352
threshold = align_method_FRAME(self, threshold, axis, flex=None)[1]
73537353
return self.where(subset, threshold, axis=axis, inplace=inplace)
73547354

7355+
@overload
7356+
def clip(
7357+
self: FrameOrSeries,
7358+
lower=...,
7359+
upper=...,
7360+
axis: Axis | None = ...,
7361+
inplace: Literal[False] = ...,
7362+
*args,
7363+
**kwargs,
7364+
) -> FrameOrSeries:
7365+
...
7366+
7367+
@overload
7368+
def clip(
7369+
self: FrameOrSeries,
7370+
lower,
7371+
*,
7372+
axis: Axis | None,
7373+
inplace: Literal[True],
7374+
**kwargs,
7375+
) -> None:
7376+
...
7377+
7378+
@overload
7379+
def clip(
7380+
self: FrameOrSeries,
7381+
lower,
7382+
*,
7383+
inplace: Literal[True],
7384+
**kwargs,
7385+
) -> None:
7386+
...
7387+
7388+
@overload
7389+
def clip(
7390+
self: FrameOrSeries,
7391+
*,
7392+
upper,
7393+
axis: Axis | None,
7394+
inplace: Literal[True],
7395+
**kwargs,
7396+
) -> None:
7397+
...
7398+
7399+
@overload
7400+
def clip(
7401+
self: FrameOrSeries,
7402+
*,
7403+
upper,
7404+
inplace: Literal[True],
7405+
**kwargs,
7406+
) -> None:
7407+
...
7408+
7409+
@overload
7410+
def clip(
7411+
self: FrameOrSeries,
7412+
*,
7413+
axis: Axis | None,
7414+
inplace: Literal[True],
7415+
**kwargs,
7416+
) -> None:
7417+
...
7418+
7419+
@overload
7420+
def clip(
7421+
self: FrameOrSeries,
7422+
lower,
7423+
upper,
7424+
axis: Axis | None,
7425+
inplace: Literal[True],
7426+
*args,
7427+
**kwargs,
7428+
) -> None:
7429+
...
7430+
7431+
@overload
7432+
def clip(
7433+
self: FrameOrSeries,
7434+
lower,
7435+
upper,
7436+
*,
7437+
inplace: Literal[True],
7438+
**kwargs,
7439+
) -> None:
7440+
...
7441+
7442+
@overload
7443+
def clip(
7444+
self: FrameOrSeries,
7445+
*,
7446+
inplace: Literal[True],
7447+
**kwargs,
7448+
) -> None:
7449+
...
7450+
7451+
@overload
7452+
def clip(
7453+
self: FrameOrSeries,
7454+
lower=...,
7455+
upper=...,
7456+
axis: Axis | None = ...,
7457+
inplace: bool_t = ...,
7458+
*args,
7459+
**kwargs,
7460+
) -> FrameOrSeries | None:
7461+
...
7462+
73557463
@final
73567464
def clip(
73577465
self: FrameOrSeries,
73587466
lower=None,
73597467
upper=None,
7360-
axis=None,
7468+
axis: Axis | None = None,
73617469
inplace: bool_t = False,
73627470
*args,
73637471
**kwargs,
7364-
) -> FrameOrSeries:
7472+
) -> FrameOrSeries | None:
73657473
"""
73667474
Trim values at input threshold(s).
73677475
@@ -10843,7 +10951,7 @@ def median(
1084310951
@doc(
1084410952
_num_doc,
1084510953
desc="Return the maximum of the values over the requested axis.\n\n"
10846-
"If you want the *index* of the maximum, use ``idxmax``. This is"
10954+
"If you want the *index* of the maximum, use ``idxmax``. This is "
1084710955
"the equivalent of the ``numpy.ndarray`` method ``argmax``.",
1084810956
name1=name1,
1084910957
name2=name2,
@@ -10860,7 +10968,7 @@ def max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs):
1086010968
@doc(
1086110969
_num_doc,
1086210970
desc="Return the minimum of the values over the requested axis.\n\n"
10863-
"If you want the *index* of the minimum, use ``idxmin``. This is"
10971+
"If you want the *index* of the minimum, use ``idxmin``. This is "
1086410972
"the equivalent of the ``numpy.ndarray`` method ``argmin``.",
1086510973
name1=name1,
1086610974
name2=name2,

0 commit comments

Comments
 (0)