Skip to content

Commit a625bd4

Browse files
Fix Index.union and get_indexer_non_unique bugs exposed by my categorical index fixes
1 parent d2b95ed commit a625bd4

File tree

3 files changed

+26
-7
lines changed

3 files changed

+26
-7
lines changed

pandas/_libs/index.pyx

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -304,12 +304,20 @@ cdef class IndexEngine:
304304
if stargets and len(stargets) < 5 and self.is_monotonic_increasing:
305305
# if there are few enough stargets and the index is monotonically
306306
# increasing, then use binary search for each starget
307+
remaining_stargets = set()
307308
for starget in stargets:
308-
start = values.searchsorted(starget, side='left')
309-
end = values.searchsorted(starget, side='right')
310-
if start != end:
311-
d[starget] = list(range(start, end))
312-
else:
309+
try:
310+
start = values.searchsorted(starget, side='left')
311+
end = values.searchsorted(starget, side='right')
312+
except TypeError: # e.g. if we tried to search for string in int array
313+
remaining_stargets.add(starget)
314+
else:
315+
if start != end:
316+
d[starget] = list(range(start, end))
317+
318+
stargets = remaining_stargets
319+
320+
if stargets:
313321
# otherwise, map by iterating through all items in the index
314322
for i in range(n):
315323
val = values[i]

pandas/core/indexes/base.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,8 +2489,14 @@ def _union(self, other, sort):
24892489
value_set = set(lvals)
24902490
result.extend([x for x in rvals if x not in value_set])
24912491
else:
2492-
indexer = self.get_indexer(other)
2493-
indexer, = (indexer == -1).nonzero()
2492+
# find indexes of things in "other" that are not in "self"
2493+
try:
2494+
indexer = self.get_indexer(other)
2495+
except InvalidIndexError:
2496+
# duplicates
2497+
indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
2498+
else:
2499+
indexer, = (indexer == -1).nonzero()
24942500

24952501
if len(indexer) > 0:
24962502
other_diff = algos.take_nd(rvals, indexer, allow_fill=False)

pandas/tests/test_base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,11 @@ def test_bool_indexing(self, indexer_klass, indexer):
990990
s = pd.Series(idx)
991991
tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx])
992992

993+
def test_get_indexer_non_unique_dtype_mismatch(self):
994+
indexes, missing = pd.Index(['A', 'B']).get_indexer_non_unique(pd.Index([0]))
995+
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), indexes)
996+
tm.assert_numpy_array_equal(np.array([0, 1], dtype=np.intp), missing)
997+
993998

994999
class TestTranspose(Ops):
9951000
errmsg = "the 'axes' parameter is not supported"

0 commit comments

Comments
 (0)