Skip to content

Commit 85f45ff

Browse files
committed
PERF: MultiIndex._engine.get_loc() handles non-unique fine
1 parent 6e1e1e4 commit 85f45ff

File tree

5 files changed

+17
-20
lines changed

5 files changed

+17
-20
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ Performance Improvements
501501
- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
502502
- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
503503
- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`,:issue:`21606`)
504+
- Improved performance of :func:`MultiIndex.get_loc` for non-unique indexes, which as a consequence does not emit a ``PerformanceWarning`` any more (:issue:`19464`)
504505
- Improved performance of membership checks in :class:`Categorical` and :class:`CategoricalIndex`
505506
(i.e. ``x in cat``-style checks are much faster). :meth:`CategoricalIndex.contains`
506507
is likewise much faster (:issue:`21369`, :issue:`21508`)

pandas/core/indexes/multi.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pandas.core.dtypes.common import (
1717
ensure_int64,
1818
ensure_platform_int,
19+
is_integer,
1920
is_categorical_dtype,
2021
is_object_dtype,
2122
is_hashable,
@@ -2197,10 +2198,16 @@ def _maybe_to_slice(loc):
21972198
raise KeyError('Key length ({0}) exceeds index depth ({1})'
21982199
''.format(keylen, self.nlevels))
21992200

2200-
if keylen == self.nlevels and self.is_unique:
2201-
return self._engine.get_loc(key)
2201+
# If the index is monotonic, the code for partial selection or
2202+
# non-unique index (below) is more efficient than the following:
2203+
if keylen == self.nlevels and not self.is_monotonic:
2204+
loc = self._engine.get_loc(key)
2205+
if not self.is_unique and is_integer(loc):
2206+
# Indexers expect a slice from indexing a non-unique index
2207+
loc = slice(loc, loc + 1)
2208+
return loc
22022209

2203-
# -- partial selection or non-unique index
2210+
# -- partial selection or non-unique index or monotonic index
22042211
# break the key into 2 parts based on the lexsort_depth of the index;
22052212
# the first part returns a continuous slice of the index; the 2nd part
22062213
# needs linear search within the slice
@@ -2213,6 +2220,10 @@ def _maybe_to_slice(loc):
22132220
raise KeyError(key)
22142221

22152222
if not follow_key:
2223+
# Indexers expect an integer from indexing a key in a unique index
2224+
if self.is_unique:
2225+
# Breaks if we pass a np.int64. TODO: investigate why
2226+
return int(start)
22162227
return slice(start, stop)
22172228

22182229
warnings.warn('indexing past lexsort depth may impact performance.',

pandas/tests/frame/test_sort_values_level_as_str.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import pytest
33

44
from pandas import DataFrame, Index
5-
from pandas.errors import PerformanceWarning
65
from pandas.util import testing as tm
76
from pandas.util.testing import assert_frame_equal
87

@@ -85,14 +84,7 @@ def test_sort_column_level_and_index_label(
8584
ascending=ascending,
8685
axis=1)
8786

88-
if len(levels) > 1:
89-
# Accessing multi-level columns that are not lexsorted raises a
90-
# performance warning
91-
with tm.assert_produces_warning(PerformanceWarning,
92-
check_stacklevel=False):
93-
assert_frame_equal(result, expected)
94-
else:
95-
assert_frame_equal(result, expected)
87+
assert_frame_equal(result, expected)
9688

9789

9890
def test_sort_values_column_index_level_precedence():

pandas/tests/indexing/test_ix.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from pandas.compat import lrange
1212
from pandas import Series, DataFrame, option_context, MultiIndex
1313
from pandas.util import testing as tm
14-
from pandas.errors import PerformanceWarning
1514

1615

1716
class TestIX(object):
@@ -187,9 +186,7 @@ def test_ix_general(self):
187186
df = DataFrame(data).set_index(keys=['col', 'year'])
188187
key = 4.0, 2012
189188

190-
# emits a PerformanceWarning, ok
191-
with tm.assert_produces_warning(PerformanceWarning):
192-
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
189+
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
193190

194191
# this is ok
195192
df.sort_index(inplace=True)

pandas/tests/indexing/test_multiindex.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,10 +366,6 @@ def test_multiindex_perf_warn(self):
366366
'joe': ['x', 'x', 'z', 'y'],
367367
'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
368368

369-
with tm.assert_produces_warning(PerformanceWarning,
370-
clear=[pd.core.index]):
371-
df.loc[(1, 'z')]
372-
373369
df = df.iloc[[2, 1, 3, 0]]
374370
with tm.assert_produces_warning(PerformanceWarning):
375371
df.loc[(0, )]

0 commit comments

Comments
 (0)