Skip to content

Commit 57d66a4

Browse files
committed
PERF: MultiIndex._engine.get_loc() handles non-unique fine
1 parent 2299693 commit 57d66a4

File tree

5 files changed

+10
-19
lines changed

5 files changed

+10
-19
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,7 @@ Performance Improvements
10661066
- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
10671067
- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
10681068
- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`)
1069+
- Improved performance of :func:`MultiIndex.get_loc` for non-unique indexes, which as a consequence does not emit a ``PerformanceWarning`` any more (:issue:`19464`)
10691070
- Improved performance of :func:`MultiIndex.remove_unused_levels` when there are no unused levels, at the cost of a reduction in performance when there are (:issue:`19289`)
10701071
- Improved performance of :func:`Index.get_loc` for non-unique indexes (:issue:`19478`)
10711072
- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`)

pandas/core/indexes/multi.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.core.dtypes.common import (
1515
_ensure_int64,
1616
_ensure_platform_int,
17+
is_integer,
1718
is_categorical_dtype,
1819
is_object_dtype,
1920
is_hashable,
@@ -2274,8 +2275,12 @@ def _maybe_to_slice(loc):
22742275
raise KeyError('Key length ({0}) exceeds index depth ({1})'
22752276
''.format(keylen, self.nlevels))
22762277

2277-
if keylen == self.nlevels and self.is_unique:
2278-
return self._engine.get_loc(key)
2278+
if keylen == self.nlevels:
2279+
loc = self._engine.get_loc(key)
2280+
if not self.is_unique and is_integer(loc):
2281+
# Indexers expect a slice from indexing a non-unique index
2282+
loc = slice(loc, loc + 1)
2283+
return loc
22792284

22802285
# -- partial selection or non-unique index
22812286
# break the key into 2 parts based on the lexsort_depth of the index;

pandas/tests/frame/test_sort_values_level_as_str.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import pytest
33

44
from pandas import DataFrame, Index
5-
from pandas.errors import PerformanceWarning
65
from pandas.util import testing as tm
76
from pandas.util.testing import assert_frame_equal
87

@@ -85,14 +84,7 @@ def test_sort_column_level_and_index_label(
8584
ascending=ascending,
8685
axis=1)
8786

88-
if len(levels) > 1:
89-
# Accessing multi-level columns that are not lexsorted raises a
90-
# performance warning
91-
with tm.assert_produces_warning(PerformanceWarning,
92-
check_stacklevel=False):
93-
assert_frame_equal(result, expected)
94-
else:
95-
assert_frame_equal(result, expected)
87+
assert_frame_equal(result, expected)
9688

9789

9890
def test_sort_values_column_index_level_precedence():

pandas/tests/indexing/test_ix.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from pandas.compat import lrange
1212
from pandas import Series, DataFrame, option_context, MultiIndex
1313
from pandas.util import testing as tm
14-
from pandas.errors import PerformanceWarning
1514

1615

1716
class TestIX(object):
@@ -187,9 +186,7 @@ def test_ix_general(self):
187186
df = DataFrame(data).set_index(keys=['col', 'year'])
188187
key = 4.0, 2012
189188

190-
# emits a PerformanceWarning, ok
191-
with tm.assert_produces_warning(PerformanceWarning):
192-
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
189+
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
193190

194191
# this is ok
195192
df.sort_index(inplace=True)

pandas/tests/indexing/test_multiindex.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,6 @@ def test_multiindex_perf_warn(self):
359359
'joe': ['x', 'x', 'z', 'y'],
360360
'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
361361

362-
with tm.assert_produces_warning(PerformanceWarning,
363-
clear=[pd.core.index]):
364-
df.loc[(1, 'z')]
365-
366362
df = df.iloc[[2, 1, 3, 0]]
367363
with tm.assert_produces_warning(PerformanceWarning):
368364
df.loc[(0, )]

0 commit comments

Comments
 (0)