Skip to content

CLN: push key coercion to the indexes with Index._convert_list_indexer #15678

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 21 additions & 65 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from pandas.types.generic import ABCDataFrame, ABCPanel, ABCSeries
from pandas.types.common import (is_integer_dtype,
is_integer, is_float,
is_categorical_dtype,
is_list_like,
is_sequence,
is_iterator,
Expand Down Expand Up @@ -1087,51 +1086,24 @@ def _getitem_iterable(self, key, axis=0):
inds, = key.nonzero()
return self.obj.take(inds, axis=axis, convert=False)
else:
if isinstance(key, Index):
keyarr = labels._convert_index_indexer(key)
else:
keyarr = _asarray_tuplesafe(key)
keyarr = labels._convert_arr_indexer(keyarr)

if is_categorical_dtype(labels):
keyarr = labels._shallow_copy(keyarr)

# have the index handle the indexer and possibly return
# an indexer or raising
indexer = labels._convert_list_indexer(keyarr, kind=self.name)
# Have the index compute an indexer or return None
# if it cannot handle
indexer, keyarr = labels._convert_listlike_indexer(
key, kind=self.name)
if indexer is not None:
return self.obj.take(indexer, axis=axis)

# this is not the most robust, but...
if (isinstance(labels, MultiIndex) and len(keyarr) and
not isinstance(keyarr[0], tuple)):
level = 0
else:
level = None

# existing labels are unique and indexer are unique
if labels.is_unique and Index(keyarr).is_unique:

try:
result = self.obj.reindex_axis(keyarr, axis=axis,
level=level)

# this is an error as we are trying to find
# keys in a multi-index that don't exist
if isinstance(labels, MultiIndex) and level is not None:
if (hasattr(result, 'ndim') and
not np.prod(result.shape) and len(keyarr)):
raise KeyError("cannot index a multi-index axis "
"with these keys")

return result

return self.obj.reindex_axis(keyarr, axis=axis)
except AttributeError:

# Series
if axis != 0:
raise AssertionError('axis must be 0')
return self.obj.reindex(keyarr, level=level)
return self.obj.reindex(keyarr)

# existing labels are non-unique
else:
Expand Down Expand Up @@ -1225,49 +1197,33 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):

if is_nested_tuple(obj, labels):
return labels.get_locs(obj)

elif is_list_like_indexer(obj):

if is_bool_indexer(obj):
obj = check_bool_indexer(labels, obj)
inds, = obj.nonzero()
return inds
else:
if isinstance(obj, Index):
# want Index objects to pass through untouched
objarr = obj
else:
objarr = _asarray_tuplesafe(obj)

# The index may want to handle a list indexer differently
# by returning an indexer or raising
indexer = labels._convert_list_indexer(objarr, kind=self.name)
# Have the index compute an indexer or return None
# if it cannot handle
indexer, objarr = labels._convert_listlike_indexer(
obj, kind=self.name)
if indexer is not None:
return indexer

# this is not the most robust, but...
if (isinstance(labels, MultiIndex) and
not isinstance(objarr[0], tuple)):
level = 0
_, indexer = labels.reindex(objarr, level=level)
# unique index
if labels.is_unique:
indexer = check = labels.get_indexer(objarr)

# take all
if indexer is None:
indexer = np.arange(len(labels))

check = labels.levels[0].get_indexer(objarr)
# non-unique (dups)
else:
level = None

# unique index
if labels.is_unique:
indexer = check = labels.get_indexer(objarr)

# non-unique (dups)
else:
(indexer,
missing) = labels.get_indexer_non_unique(objarr)
# 'indexer' has dupes, create 'check' using 'missing'
check = np.zeros_like(objarr)
check[missing] = -1
(indexer,
missing) = labels.get_indexer_non_unique(objarr)
# 'indexer' has dupes, create 'check' using 'missing'
check = np.zeros_like(objarr)
check[missing] = -1

mask = check == -1
if mask.any():
Expand Down
37 changes: 37 additions & 0 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,6 +1339,27 @@ def is_int(v):

return indexer

def _convert_listlike_indexer(self, keyarr, kind=None):
"""
Parameters
----------
keyarr : list-like
Indexer to convert.

Returns
-------
tuple (indexer, keyarr)
indexer is an ndarray or None if cannot convert
keyarr are tuple-safe keys
"""
if isinstance(keyarr, Index):
keyarr = self._convert_index_indexer(keyarr)
else:
keyarr = self._convert_arr_indexer(keyarr)

indexer = self._convert_list_indexer(keyarr, kind=kind)
return indexer, keyarr

_index_shared_docs['_convert_arr_indexer'] = """
Convert an array-like indexer to the appropriate dtype.

Expand All @@ -1354,6 +1375,7 @@ def is_int(v):

@Appender(_index_shared_docs['_convert_arr_indexer'])
def _convert_arr_indexer(self, keyarr):
keyarr = _asarray_tuplesafe(keyarr)
return keyarr

_index_shared_docs['_convert_index_indexer'] = """
Expand All @@ -1373,6 +1395,21 @@ def _convert_arr_indexer(self, keyarr):
def _convert_index_indexer(self, keyarr):
return keyarr

_index_shared_docs['_convert_list_indexer'] = """
Convert a list-like indexer to the appropriate dtype.

Parameters
----------
keyarr : Index (or sub-class)
Indexer to convert.
kind : iloc, ix, loc, optional

Returns
-------
positional indexer or None
"""

@Appender(_index_shared_docs['_convert_list_indexer'])
def _convert_list_indexer(self, keyarr, kind=None):
"""
passed a key that is tuplesafe that is integer based
Expand Down
19 changes: 14 additions & 5 deletions pandas/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import pandas.core.base as base
import pandas.core.missing as missing
import pandas.indexes.base as ibase
from pandas.core.common import _asarray_tuplesafe

_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))

Expand Down Expand Up @@ -458,19 +460,26 @@ def get_indexer_non_unique(self, target):
codes = self.categories.get_indexer(target)
return self._engine.get_indexer_non_unique(codes)

@Appender(_index_shared_docs['_convert_list_indexer'])
def _convert_list_indexer(self, keyarr, kind=None):
"""
we are passed a list indexer.
Return our indexer or raise if all of the values are not included in
the categories
"""
# Return our indexer or raise if all of the values are not included in
# the categories
codes = self.categories.get_indexer(keyarr)
if (codes == -1).any():
raise KeyError("a list-indexer must only include values that are "
"in the categories")

return None

@Appender(_index_shared_docs['_convert_arr_indexer'])
def _convert_arr_indexer(self, keyarr):
keyarr = _asarray_tuplesafe(keyarr)
return self._shallow_copy(keyarr)

@Appender(_index_shared_docs['_convert_index_indexer'])
def _convert_index_indexer(self, keyarr):
return self._shallow_copy(keyarr)

@Appender(_index_shared_docs['take'] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True,
fill_value=None, **kwargs):
Expand Down
33 changes: 33 additions & 0 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1568,6 +1568,39 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):

return new_index, indexer

def _convert_listlike_indexer(self, keyarr, kind=None):
"""
Parameters
----------
keyarr : list-like
Indexer to convert.

Returns
-------
tuple (indexer, keyarr)
indexer is an ndarray or None if cannot convert
keyarr are tuple-safe keys
"""
indexer, keyarr = super(MultiIndex, self)._convert_listlike_indexer(
keyarr, kind=kind)

# are we indexing a specific level
if indexer is None and len(keyarr) and not isinstance(keyarr[0],
tuple):
level = 0
_, indexer = self.reindex(keyarr, level=level)

# take all
if indexer is None:
indexer = np.arange(len(self))

check = self.levels[0].get_indexer(keyarr)
mask = check == -1
if mask.any():
raise KeyError('%s not in index' % keyarr[mask])

return indexer, keyarr

@Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
method = missing.clean_reindex_fill_method(method)
Expand Down
1 change: 1 addition & 0 deletions pandas/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def _convert_arr_indexer(self, keyarr):
# Cast the indexer to uint64 if possible so
# that the values returned from indexing are
# also uint64.
keyarr = _asarray_tuplesafe(keyarr)
if is_integer_dtype(keyarr):
return _asarray_tuplesafe(keyarr, dtype=np.uint64)
return keyarr
Expand Down