Skip to content

Commit f91c56b

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into cln-arith
2 parents 113b58a + 4e55346 commit f91c56b

File tree

15 files changed

+100
-63
lines changed

15 files changed

+100
-63
lines changed

doc/source/whatsnew/v1.2.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ Performance improvements
285285
- ``Styler`` uuid method altered to compress data transmission over web whilst maintaining reasonably low table collision probability (:issue:`36345`)
286286
- Performance improvement in :meth:`pd.to_datetime` with non-ns time unit for ``float`` ``dtype`` columns (:issue:`20445`)
287287
- Performance improvement in setting values on a :class:`IntervalArray` (:issue:`36310`)
288+
- The internal index method :meth:`~Index._shallow_copy` now makes the new index and original index share cached attributes,
289+
avoiding creating these again, if created on either. This can speed up operations that depend on creating copies of existing indexes (:issue:`36840`)
288290
- Performance improvement in :meth:`RollingGroupby.count` (:issue:`35625`)
289291

290292
.. ---------------------------------------------------------------------------
@@ -435,7 +437,7 @@ ExtensionArray
435437

436438
- Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`)
437439
- Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`284881`)
438-
-
440+
- Fixed bug when applying a NumPy ufunc with multiple outputs to a :class:`pandas.arrays.IntegerArray` returning None (:issue:`36913`)
439441

440442

441443
Other

pandas/core/arrays/integer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ def reconstruct(x):
412412

413413
result = getattr(ufunc, method)(*inputs2, **kwargs)
414414
if isinstance(result, tuple):
415-
tuple(reconstruct(x) for x in result)
415+
return tuple(reconstruct(x) for x in result)
416416
else:
417417
return reconstruct(result)
418418

pandas/core/computation/expressions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ def where(cond, a, b, use_numexpr=True):
248248
use_numexpr : bool, default True
249249
Whether to try to use numexpr.
250250
"""
251+
assert _where is not None
251252
return _where(cond, a, b) if use_numexpr else _where_standard(cond, a, b)
252253

253254

pandas/core/computation/pytables.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ class Term(ops.Term):
4242
env: PyTablesScope
4343

4444
def __new__(cls, name, env, side=None, encoding=None):
45-
klass = Constant if not isinstance(name, str) else cls
45+
if isinstance(name, str):
46+
klass = cls
47+
else:
48+
klass = Constant
4649
return object.__new__(klass)
4750

4851
def __init__(self, name, env: PyTablesScope, side=None, encoding=None):
@@ -83,6 +86,7 @@ class BinOp(ops.BinOp):
8386

8487
op: str
8588
queryables: Dict[str, Any]
89+
condition: Optional[str]
8690

8791
def __init__(self, op: str, lhs, rhs, queryables: Dict[str, Any], encoding):
8892
super().__init__(op, lhs, rhs)
@@ -184,10 +188,8 @@ def convert_value(self, v) -> "TermValue":
184188

185189
def stringify(value):
186190
if self.encoding is not None:
187-
encoder = partial(pprint_thing_encoded, encoding=self.encoding)
188-
else:
189-
encoder = pprint_thing
190-
return encoder(value)
191+
return pprint_thing_encoded(value, encoding=self.encoding)
192+
return pprint_thing(value)
191193

192194
kind = ensure_decoded(self.kind)
193195
meta = ensure_decoded(self.meta)
@@ -257,9 +259,11 @@ def __repr__(self) -> str:
257259
def invert(self):
258260
""" invert the filter """
259261
if self.filter is not None:
260-
f = list(self.filter)
261-
f[1] = self.generate_filter_op(invert=True)
262-
self.filter = tuple(f)
262+
self.filter = (
263+
self.filter[0],
264+
self.generate_filter_op(invert=True),
265+
self.filter[2],
266+
)
263267
return self
264268

265269
def format(self):

pandas/core/indexes/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -561,12 +561,12 @@ def _shallow_copy(self, values=None, name: Label = no_default):
561561
name : Label, defaults to self.name
562562
"""
563563
name = self.name if name is no_default else name
564-
cache = self._cache.copy() if values is None else {}
565-
if values is None:
566-
values = self._values
567564

568-
result = self._simple_new(values, name=name)
569-
result._cache = cache
565+
if values is not None:
566+
return self._simple_new(values, name=name)
567+
568+
result = self._simple_new(self._values, name=name)
569+
result._cache = self._cache
570570
return result
571571

572572
def is_(self, other) -> bool:

pandas/core/indexes/datetimelike.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -671,17 +671,15 @@ def _with_freq(self, freq):
671671

672672
def _shallow_copy(self, values=None, name: Label = lib.no_default):
673673
name = self.name if name is lib.no_default else name
674-
cache = self._cache.copy() if values is None else {}
675674

676-
if values is None:
677-
values = self._data
678-
679-
if isinstance(values, np.ndarray):
675+
if values is not None:
680676
# TODO: We would rather not get here
681-
values = type(self._data)(values, dtype=self.dtype)
677+
if isinstance(values, np.ndarray):
678+
values = type(self._data)(values, dtype=self.dtype)
679+
return self._simple_new(values, name=name)
682680

683-
result = type(self)._simple_new(values, name=name)
684-
result._cache = cache
681+
result = self._simple_new(self._data, name=name)
682+
result._cache = self._cache
685683
return result
686684

687685
# --------------------------------------------------------------------

pandas/core/indexes/interval.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -335,12 +335,12 @@ def _shallow_copy(
335335
self, values: Optional[IntervalArray] = None, name: Label = lib.no_default
336336
):
337337
name = self.name if name is lib.no_default else name
338-
cache = self._cache.copy() if values is None else {}
339-
if values is None:
340-
values = self._data
341338

342-
result = self._simple_new(values, name=name)
343-
result._cache = cache
339+
if values is not None:
340+
return self._simple_new(values, name=name)
341+
342+
result = self._simple_new(self._data, name=name)
343+
result._cache = self._cache
344344
return result
345345

346346
@cache_readonly

pandas/core/indexes/period.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -260,12 +260,12 @@ def _has_complex_internals(self) -> bool:
260260

261261
def _shallow_copy(self, values=None, name: Label = no_default):
262262
name = name if name is not no_default else self.name
263-
cache = self._cache.copy() if values is None else {}
264-
if values is None:
265-
values = self._data
266263

267-
result = self._simple_new(values, name=name)
268-
result._cache = cache
264+
if values is not None:
265+
return self._simple_new(values, name=name)
266+
267+
result = self._simple_new(self._data, name=name)
268+
result._cache = self._cache
269269
return result
270270

271271
def _maybe_convert_timedelta(self, other):

pandas/core/indexes/range.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -397,13 +397,13 @@ def __iter__(self):
397397
def _shallow_copy(self, values=None, name: Label = no_default):
398398
name = self.name if name is no_default else name
399399

400-
if values is None:
401-
result = self._simple_new(self._range, name=name)
402-
result._cache = self._cache.copy()
403-
return result
404-
else:
400+
if values is not None:
405401
return Int64Index._simple_new(values, name=name)
406402

403+
result = self._simple_new(self._range, name=name)
404+
result._cache = self._cache
405+
return result
406+
407407
@doc(Int64Index.copy)
408408
def copy(self, name=None, deep=False, dtype=None, names=None):
409409
name = self._validate_names(name=name, names=names, deep=deep)[0]

pandas/core/window/rolling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ def _insert_on_column(self, result: "DataFrame", obj: "DataFrame"):
398398

399399
if self.on is not None and not self._on.equals(obj.index):
400400
name = self._on.name
401-
extra_col = Series(self._on, index=obj.index, name=name)
401+
extra_col = Series(self._on, index=self.obj.index, name=name)
402402
if name in result.columns:
403403
# TODO: sure we want to overwrite results?
404404
result[name] = extra_col
@@ -2263,7 +2263,7 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer:
22632263
"""
22642264
rolling_indexer: Type[BaseIndexer]
22652265
indexer_kwargs: Optional[Dict] = None
2266-
index_array = self.obj.index.asi8
2266+
index_array = self._on.asi8
22672267
if isinstance(self.window, BaseIndexer):
22682268
rolling_indexer = type(self.window)
22692269
indexer_kwargs = self.window.__dict__

pandas/tests/arrays/integer/test_function.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,20 @@ def test_ufuncs_binary_int(ufunc):
6464
tm.assert_extension_array_equal(result, expected)
6565

6666

67+
def test_ufunc_binary_output():
68+
a = integer_array([1, 2, np.nan])
69+
result = np.modf(a)
70+
expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float"))
71+
72+
assert isinstance(result, tuple)
73+
assert len(result) == 2
74+
75+
for x, y in zip(result, expected):
76+
# TODO(FloatArray): This will return an extension array.
77+
# y = integer_array(y)
78+
tm.assert_numpy_array_equal(x, y)
79+
80+
6781
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
6882
def test_ufunc_reduce_raises(values):
6983
a = integer_array(values)

pandas/tests/base/test_misc.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def test_memory_usage(index_or_series_obj):
128128
)
129129

130130
if len(obj) == 0:
131-
assert res_deep == res == 0
131+
expected = 0 if isinstance(obj, Index) else 80
132+
assert res_deep == res == expected
132133
elif is_object or is_categorical:
133134
# only deep will pick them up
134135
assert res_deep > res

pandas/tests/indexes/common.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -935,28 +935,22 @@ def test_contains_requires_hashable_raises(self):
935935
with pytest.raises(TypeError, match=msg):
936936
{} in idx._engine
937937

938-
def test_copy_copies_cache(self):
939-
# GH32898
938+
def test_copy_shares_cache(self):
939+
# GH32898, GH36840
940940
idx = self.create_index()
941941
idx.get_loc(idx[0]) # populates the _cache.
942942
copy = idx.copy()
943943

944-
# check that the copied cache is a copy of the original
945-
assert idx._cache == copy._cache
946-
assert idx._cache is not copy._cache
947-
# cache values should reference the same object
948-
for key, val in idx._cache.items():
949-
assert copy._cache[key] is val, key
944+
assert copy._cache is idx._cache
950945

951-
def test_shallow_copy_copies_cache(self):
952-
# GH32669
946+
def test_shallow_copy_shares_cache(self):
947+
# GH32669, GH36840
953948
idx = self.create_index()
954949
idx.get_loc(idx[0]) # populates the _cache.
955950
shallow_copy = idx._shallow_copy()
956951

957-
# check that the shallow_copied cache is a copy of the original
958-
assert idx._cache == shallow_copy._cache
959-
assert idx._cache is not shallow_copy._cache
960-
# cache values should reference the same object
961-
for key, val in idx._cache.items():
962-
assert shallow_copy._cache[key] is val, key
952+
assert shallow_copy._cache is idx._cache
953+
954+
shallow_copy = idx._shallow_copy(idx._data)
955+
assert shallow_copy._cache is not idx._cache
956+
assert shallow_copy._cache == {}

pandas/tests/window/test_grouper.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,32 @@ def test_groupby_rolling_empty_frame(self):
428428
result = expected.groupby(["s1", "s2"]).rolling(window=1).sum()
429429
expected.index = pd.MultiIndex.from_tuples([], names=["s1", "s2", None])
430430
tm.assert_frame_equal(result, expected)
431+
432+
def test_groupby_rolling_string_index(self):
433+
# GH: 36727
434+
df = pd.DataFrame(
435+
[
436+
["A", "group_1", pd.Timestamp(2019, 1, 1, 9)],
437+
["B", "group_1", pd.Timestamp(2019, 1, 2, 9)],
438+
["Z", "group_2", pd.Timestamp(2019, 1, 3, 9)],
439+
["H", "group_1", pd.Timestamp(2019, 1, 6, 9)],
440+
["E", "group_2", pd.Timestamp(2019, 1, 20, 9)],
441+
],
442+
columns=["index", "group", "eventTime"],
443+
).set_index("index")
444+
445+
groups = df.groupby("group")
446+
df["count_to_date"] = groups.cumcount()
447+
rolling_groups = groups.rolling("10d", on="eventTime")
448+
result = rolling_groups.apply(lambda df: df.shape[0])
449+
expected = pd.DataFrame(
450+
[
451+
["A", "group_1", pd.Timestamp(2019, 1, 1, 9), 1.0],
452+
["B", "group_1", pd.Timestamp(2019, 1, 2, 9), 2.0],
453+
["H", "group_1", pd.Timestamp(2019, 1, 6, 9), 3.0],
454+
["Z", "group_2", pd.Timestamp(2019, 1, 3, 9), 1.0],
455+
["E", "group_2", pd.Timestamp(2019, 1, 20, 9), 1.0],
456+
],
457+
columns=["index", "group", "eventTime", "count_to_date"],
458+
).set_index(["group", "index"])
459+
tm.assert_frame_equal(result, expected)

setup.cfg

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,9 @@ check_untyped_defs=False
154154
[mypy-pandas.core.computation.expr]
155155
check_untyped_defs=False
156156

157-
[mypy-pandas.core.computation.expressions]
158-
check_untyped_defs=False
159-
160157
[mypy-pandas.core.computation.ops]
161158
check_untyped_defs=False
162159

163-
[mypy-pandas.core.computation.pytables]
164-
check_untyped_defs=False
165-
166160
[mypy-pandas.core.computation.scope]
167161
check_untyped_defs=False
168162

0 commit comments

Comments
 (0)