Skip to content

BUG: Support categorical targets in IntervalIndex.get_indexer #30181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,7 @@ Numeric
- Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`)
- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`)
- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`)
- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`)

Conversion
^^^^^^^^^^
Expand All @@ -728,7 +729,7 @@ Strings
Interval
^^^^^^^^

-
- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`)
-

Indexing
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from pandas.core.dtypes.common import (
ensure_platform_int,
is_categorical,
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
is_dtype_equal,
Expand All @@ -36,6 +37,7 @@
from pandas.core.dtypes.missing import isna

from pandas._typing import AnyArrayLike
from pandas.core.algorithms import take_1d
from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs
import pandas.core.common as com
import pandas.core.indexes.base as ibase
Expand Down Expand Up @@ -958,6 +960,10 @@ def get_indexer(
left_indexer = self.left.get_indexer(target_as_index.left)
right_indexer = self.right.get_indexer(target_as_index.right)
indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
elif is_categorical(target_as_index):
# get an indexer for unique categories then propogate to codes via take_1d
categories_indexer = self.get_indexer(target_as_index.categories)
indexer = take_1d(categories_indexer, target_as_index.codes, fill_value=-1)
elif not is_object_dtype(target_as_index):
# homogeneous scalar index: use IntervalTree
target_as_index = self._maybe_convert_i8(target_as_index)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2272,6 +2272,15 @@ def test_round_nonunique_categorical(self):

tm.assert_frame_equal(result, expected)

def test_round_interval_category_columns(self):
# GH 30063
columns = pd.CategoricalIndex(pd.interval_range(0, 2))
df = DataFrame([[0.66, 1.1], [0.3, 0.25]], columns=columns)

result = df.round()
expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns)
tm.assert_frame_equal(result, expected)

# ---------------------------------------------------------------------
# Clip

Expand Down
28 changes: 27 additions & 1 deletion pandas/tests/indexes/interval/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
import numpy as np
import pytest

from pandas import Interval, IntervalIndex, Timedelta, date_range, timedelta_range
from pandas import (
CategoricalIndex,
Interval,
IntervalIndex,
Timedelta,
date_range,
timedelta_range,
)
from pandas.core.indexes.base import InvalidIndexError
import pandas.util.testing as tm

Expand Down Expand Up @@ -231,6 +238,25 @@ def test_get_indexer_length_one_interval(self, size, closed):
expected = np.array([0] * size, dtype="intp")
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize(
"target",
[
IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]),
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]),
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"),
[-1, 0, 0.5, 1, 2, 2.5, np.nan],
["foo", "foo", "bar", "baz"],
],
)
def test_get_indexer_categorical(self, target, ordered_fixture):
# GH 30063: categorical and non-categorical results should be consistent
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
categorical_target = CategoricalIndex(target, ordered=ordered_fixture)

result = index.get_indexer(categorical_target)
expected = index.get_indexer(target)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize(
"tuples, closed",
[
Expand Down