Skip to content

Make get_loc with nan for FloatIndex consistent with other index types #39382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 3, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pandas/_libs/index_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ cdef class {{name}}Engine(IndexEngine):
with warnings.catch_warnings():
# e.g. if values is float64 and `val` is a str, suppress warning
warnings.filterwarnings("ignore", category=FutureWarning)
indexer = values == val
if val != val:
indexer = values != values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

something like

{{ if dtype in ["float64", "float32"]}}
if val != val:
    indexer = np.isnan(values)
else:
    indexer = values == val
{{else}}
indexer = values == val
{{endif}}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did the same thing yesterday, this caused the test to fail. Weird...

else:
indexer = values == val
except TypeError:
# if the equality above returns a bool, cython will raise TypeError
# when trying to cast it to ndarray
Expand Down
8 changes: 0 additions & 8 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2286,14 +2286,6 @@ def _isnan(self):
values.fill(False)
return values

@cache_readonly
@final
def _nan_idxs(self):
if self._can_hold_na:
return self._isnan.nonzero()[0]
else:
return np.array([], dtype=np.intp)

@cache_readonly
def hasnans(self) -> bool:
"""
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,15 +380,6 @@ def get_loc(self, key, method=None, tolerance=None):
if is_bool(key):
# Catch this to avoid accidentally casting to 1.0
raise KeyError(key)

if is_float(key) and np.isnan(key):
nan_idxs = self._nan_idxs
if not len(nan_idxs):
raise KeyError(key)
elif len(nan_idxs) == 1:
return nan_idxs[0]
return nan_idxs

return super().get_loc(key, method=method, tolerance=tolerance)

# ----------------------------------------------------------------
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/indexes/datetimes/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,16 +316,13 @@ def test_nat(self, tz_naive_fixture):
idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
assert idx._can_hold_na

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
assert idx._can_hold_na

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
Expand Down
16 changes: 10 additions & 6 deletions pandas/tests/indexes/numeric/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,10 @@ def test_get_loc_na(self):
idx = Float64Index([np.nan, 1, np.nan])
assert idx.get_loc(1) == 1

# FIXME: dont leave commented-out
# representable by slice [0:2:2]
# pytest.raises(KeyError, idx.slice_locs, np.nan)
sliced = idx.slice_locs(np.nan)
assert isinstance(sliced, tuple)
assert sliced == (0, 3)

msg = "'Cannot get left slice bound for non-unique label: nan'"
with pytest.raises(KeyError, match=msg):
idx.slice_locs(np.nan)
# not representable by slice
idx = Float64Index([np.nan, 1, np.nan, np.nan])
assert idx.get_loc(1) == 1
Expand All @@ -80,6 +77,13 @@ def test_get_loc_missing_nan(self):
# listlike/non-hashable raises TypeError
idx.get_loc([np.nan])

@pytest.mark.parametrize("method", ["nearest", "pad"])
def test_get_loc_float_index_nan_with_method(self, method):
# GH#39382
idx = Index([1.0, 2.0, 3.0])
result = idx.get_loc(np.nan, method=method)
assert result == 2


class TestGetIndexer:
@pytest.mark.parametrize("method", ["pad", "backfill", "nearest"])
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/indexes/period/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,12 @@ def test_nat(self):

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

idx = PeriodIndex(["2011-01-01", "NaT"], freq="D")
assert idx._can_hold_na

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

def test_freq_setter_deprecated(self):
# GH 20678
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/indexes/timedeltas/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,12 @@ def test_nat(self):

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

idx = TimedeltaIndex(["1 days", "NaT"])
assert idx._can_hold_na

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

@pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
@pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
Expand Down