Skip to content

Make get_loc with nan for FloatIndex consistent with other index types #39382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ Indexing
- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)
- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)
- Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`)
- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`)
- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`)
- Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`)

Expand Down
7 changes: 7 additions & 0 deletions pandas/_libs/index_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,14 @@ cdef class {{name}}Engine(IndexEngine):
with warnings.catch_warnings():
# e.g. if values is float64 and `val` is a str, suppress warning
warnings.filterwarnings("ignore", category=FutureWarning)
{{if name in {'Float64', 'Float32'} }}
if util.is_nan(val):
indexer = np.isnan(values)
else:
indexer = values == val
{{else}}
indexer = values == val
{{endif}}
except TypeError:
# if the equality above returns a bool, cython will raise TypeError
# when trying to cast it to ndarray
Expand Down
11 changes: 3 additions & 8 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2292,14 +2292,6 @@ def _isnan(self):
values.fill(False)
return values

@cache_readonly
@final
def _nan_idxs(self):
if self._can_hold_na:
return self._isnan.nonzero()[0]
else:
return np.array([], dtype=np.intp)

@cache_readonly
def hasnans(self) -> bool:
"""
Expand Down Expand Up @@ -3224,6 +3216,9 @@ def get_loc(self, key, method=None, tolerance=None):
except KeyError as err:
raise KeyError(key) from err

if is_scalar(key) and isna(key) and not self.hasnans:
raise KeyError(key)

if tolerance is not None:
tolerance = self._convert_tolerance(tolerance, np.asarray(key))

Expand Down
9 changes: 0 additions & 9 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,15 +341,6 @@ def get_loc(self, key, method=None, tolerance=None):
if is_bool(key):
# Catch this to avoid accidentally casting to 1.0
raise KeyError(key)

if is_float(key) and np.isnan(key):
nan_idxs = self._nan_idxs
if not len(nan_idxs):
raise KeyError(key)
elif len(nan_idxs) == 1:
return nan_idxs[0]
return nan_idxs

return super().get_loc(key, method=method, tolerance=tolerance)

# ----------------------------------------------------------------
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/indexes/datetimes/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,16 +316,13 @@ def test_nat(self, tz_naive_fixture):
idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
assert idx._can_hold_na

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
assert idx._can_hold_na

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
Expand Down
27 changes: 20 additions & 7 deletions pandas/tests/indexes/numeric/test_indexing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
import numpy as np
import pytest

from pandas import Float64Index, Index, Int64Index, RangeIndex, Series, UInt64Index
from pandas import (
Float64Index,
Index,
Int64Index,
RangeIndex,
Series,
Timestamp,
UInt64Index,
)
import pandas._testing as tm


Expand Down Expand Up @@ -102,13 +110,10 @@ def test_get_loc_na(self):
idx = Float64Index([np.nan, 1, np.nan])
assert idx.get_loc(1) == 1

# FIXME: dont leave commented-out
# representable by slice [0:2:2]
# pytest.raises(KeyError, idx.slice_locs, np.nan)
sliced = idx.slice_locs(np.nan)
assert isinstance(sliced, tuple)
assert sliced == (0, 3)

msg = "'Cannot get left slice bound for non-unique label: nan'"
with pytest.raises(KeyError, match=msg):
idx.slice_locs(np.nan)
# not representable by slice
idx = Float64Index([np.nan, 1, np.nan, np.nan])
assert idx.get_loc(1) == 1
Expand All @@ -128,6 +133,14 @@ def test_get_loc_missing_nan(self):
# listlike/non-hashable raises TypeError
idx.get_loc([np.nan])

@pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]])
@pytest.mark.parametrize("method", ["nearest", "pad", "backfill"])
def test_get_loc_float_index_nan_with_method(self, vals, method):
# GH#39382
idx = Index(vals)
with pytest.raises(KeyError, match="nan"):
idx.get_loc(np.nan, method=method)


class TestGetIndexer:
def test_get_indexer(self):
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/indexes/period/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,12 @@ def test_nat(self):

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

idx = PeriodIndex(["2011-01-01", "NaT"], freq="D")
assert idx._can_hold_na

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

def test_freq_setter_deprecated(self):
# GH 20678
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/indexes/timedeltas/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,12 @@ def test_nat(self):

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

idx = TimedeltaIndex(["1 days", "NaT"])
assert idx._can_hold_na

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

@pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
@pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
Expand Down