Skip to content

Commit fccb54d

Browse files
committed
Revert "deprecate categories and ordered parameters"
This reverts commit 6cf8203.
1 parent 6cf8203 commit fccb54d

File tree

16 files changed

+155
-138
lines changed

16 files changed

+155
-138
lines changed

doc/source/whatsnew/v0.24.0.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,6 @@ Deprecations
12831283
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
12841284
- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`)
12851285
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
1286-
- :meth:`Categorical.from_codes` has deprecated parameters ``categories`` and ``ordered``. Supply a :class:`~pandas.api.types.CategoricalDtype` to new parameter ``dtype`` instead. (:issue:`24398`)
12871286
- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
12881287
- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
12891288
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)

pandas/core/arrays/categorical.py

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -605,9 +605,9 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
605605
@classmethod
606606
def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
607607
"""
608-
Make a Categorical type from codes and CategoricalDtype.
608+
Make a Categorical type from codes and categories arrays.
609609
610-
This constructor is useful if you already have codes and the dtype and
610+
This constructor is useful if you already have codes and categories and
611611
so do not need the (computation intensive) factorization step, which is
612612
usually done on the constructor.
613613
@@ -621,21 +621,19 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
621621
categories or -1 for NaN
622622
categories : index-like, optional
623623
The categories for the categorical. Items need to be unique.
624-
625-
.. deprecated:: 0.24.0
626-
Use ``dtype`` instead.
627624
ordered : bool, optional
628625
Whether or not this categorical is treated as an ordered
629626
categorical. If not given, the resulting categorical will be
630627
unordered.
631628
632-
.. deprecated:: 0.24.0
633-
Use ``dtype`` instead.
634-
dtype : CategoricalDtype
629+
.. versionchanged:: 0.24.0
630+
631+
The default value has been changed to ``None``. Previously
632+
the default value was ``False``.
633+
dtype : CategoricalDtype, optional
635634
An instance of ``CategoricalDtype`` to use for this categorical.
636635
637636
.. versionadded:: 0.24.0
638-
dtype will be required in the future.
639637
640638
Examples
641639
--------
@@ -644,18 +642,8 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
644642
[a, b, a, b]
645643
Categories (2, object): [a < b]
646644
"""
647-
if dtype is not None:
648-
if categories is not None or ordered is not None:
649-
raise ValueError("Cannot specify `categories` or `ordered` "
650-
"together with `dtype`.")
651-
elif categories is None and dtype is None:
652-
raise ValueError("Must specify `dtype`.")
653-
else:
654-
msg = u("The 'categories' and 'ordered' keyword are deprecated "
655-
"and will be removed in a future version. Please use "
656-
"'dtype' instead.")
657-
warn(msg, FutureWarning, stacklevel=2)
658-
dtype = CategoricalDtype(categories, ordered)
645+
dtype = CategoricalDtype._from_values_or_dtype(codes, categories,
646+
ordered, dtype)
659647

660648
codes = np.asarray(codes) # #21767
661649
if not is_integer_dtype(codes):
@@ -1223,8 +1211,9 @@ def map(self, mapper):
12231211
"""
12241212
new_categories = self.categories.map(mapper)
12251213
try:
1226-
new_dtype = CategoricalDtype(new_categories, ordered=self.ordered)
1227-
return self.from_codes(self._codes.copy(), dtype=new_dtype)
1214+
return self.from_codes(self._codes.copy(),
1215+
categories=new_categories,
1216+
ordered=self.ordered)
12281217
except ValueError:
12291218
# NA values are represented in self._codes with -1
12301219
# np.take causes NA values to take final element in new_categories

pandas/core/groupby/grouper.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from pandas.core.dtypes.common import (
1515
ensure_categorical, is_categorical_dtype, is_datetime64_dtype, is_hashable,
1616
is_list_like, is_scalar, is_timedelta64_dtype)
17-
from pandas.core.dtypes.dtypes import CategoricalDtype
1817
from pandas.core.dtypes.generic import ABCSeries
1918

2019
import pandas.core.algorithms as algorithms
@@ -293,19 +292,21 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
293292
from pandas.core.groupby.categorical import recode_for_groupby
294293
self.grouper, self.all_grouper = recode_for_groupby(
295294
self.grouper, self.sort, observed)
296-
dtype = CategoricalDtype(self.grouper.categories,
297-
ordered=self.grouper.ordered)
295+
categories = self.grouper.categories
298296

299297
# we make a CategoricalIndex out of the cat grouper
300298
# preserving the categories / ordered attributes
301299
self._labels = self.grouper.codes
302300
if observed:
303301
codes = algorithms.unique1d(self.grouper.codes)
304302
else:
305-
codes = np.arange(len(dtype.categories))
303+
codes = np.arange(len(categories))
306304

307305
self._group_index = CategoricalIndex(
308-
Categorical.from_codes(codes=codes, dtype=dtype))
306+
Categorical.from_codes(
307+
codes=codes,
308+
categories=categories,
309+
ordered=self.grouper.ordered))
309310

310311
# we are done
311312
if isinstance(self.grouper, Grouping):
@@ -394,8 +395,8 @@ def _make_labels(self):
394395

395396
@cache_readonly
396397
def groups(self):
397-
return self.index.groupby(
398-
Categorical(self.labels, self.group_index, fastpath=True))
398+
return self.index.groupby(Categorical.from_codes(self.labels,
399+
self.group_index))
399400

400401

401402
def _get_grouper(obj, key=None, axis=0, level=None, sort=True,

pandas/core/indexes/multi.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
ensure_int64, ensure_platform_int, is_categorical_dtype, is_hashable,
1919
is_integer, is_iterator, is_list_like, is_object_dtype, is_scalar,
2020
pandas_dtype)
21-
from pandas.core.dtypes.dtypes import (
22-
CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
21+
from pandas.core.dtypes.dtypes import ExtensionDtype, PandasExtensionDtype
2322
from pandas.core.dtypes.generic import ABCDataFrame
2423
from pandas.core.dtypes.missing import array_equivalent, isna
2524

@@ -2027,14 +2026,13 @@ def _get_codes_for_sorting(self):
20272026
"""
20282027
from pandas.core.arrays import Categorical
20292028

2030-
def as_dtype(level_codes):
2031-
cats = np.arange(np.array(level_codes).max() + 1 if
2029+
def cats(level_codes):
2030+
return np.arange(np.array(level_codes).max() + 1 if
20322031
len(level_codes) else 0,
20332032
dtype=level_codes.dtype)
2034-
return CategoricalDtype(cats, ordered=True)
20352033

2036-
return [Categorical.from_codes(level_codes,
2037-
dtype=as_dtype(level_codes))
2034+
return [Categorical.from_codes(level_codes, cats(level_codes),
2035+
ordered=True)
20382036
for level_codes in self.codes]
20392037

20402038
def sortlevel(self, level=0, ascending=True, sort_remaining=True):

pandas/io/packers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
from pandas.core.dtypes.common import (
5656
is_categorical_dtype, is_datetime64tz_dtype, is_object_dtype,
5757
needs_i8_conversion, pandas_dtype)
58-
from pandas.core.dtypes.dtypes import CategoricalDtype as CDT
5958

6059
from pandas import ( # noqa:F401
6160
Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Float64Index,
@@ -622,8 +621,9 @@ def decode(obj):
622621
name=obj[u'name'])
623622
elif typ == u'category':
624623
from_codes = globals()[obj[u'klass']].from_codes
625-
dtype = CDT(obj[u'categories'], ordered=obj[u'ordered'])
626-
return from_codes(codes=obj[u'codes'], dtype=dtype)
624+
return from_codes(codes=obj[u'codes'],
625+
categories=obj[u'categories'],
626+
ordered=obj[u'ordered'])
627627

628628
elif typ == u'interval':
629629
return Interval(obj[u'left'], obj[u'right'], obj[u'closed'])

pandas/io/pytables.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
ensure_int64, ensure_object, ensure_platform_int, is_categorical_dtype,
2525
is_datetime64_dtype, is_datetime64tz_dtype, is_list_like,
2626
is_timedelta64_dtype)
27-
from pandas.core.dtypes.dtypes import CategoricalDtype
2827
from pandas.core.dtypes.missing import array_equivalent
2928

3029
from pandas import (
@@ -2207,8 +2206,10 @@ def convert(self, values, nan_rep, encoding, errors):
22072206
categories = categories[~mask]
22082207
codes[codes != -1] -= mask.astype(int).cumsum().values
22092208

2210-
dtype = CategoricalDtype(categories, ordered=self.ordered)
2211-
self.data = Categorical.from_codes(codes, dtype=dtype)
2209+
self.data = Categorical.from_codes(codes,
2210+
categories=categories,
2211+
ordered=self.ordered)
2212+
22122213
else:
22132214

22142215
try:

pandas/tests/arrays/categorical/test_constructors.py

Lines changed: 58 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,18 @@ class TestCategoricalConstructors(object):
2121
def test_validate_ordered(self):
2222
# see gh-14058
2323
exp_msg = "'ordered' must either be 'True' or 'False'"
24+
exp_err = TypeError
2425

25-
# This should be a boolean or None.
26+
# This should be a boolean.
2627
ordered = np.array([0, 1, 2])
2728

28-
with pytest.raises(TypeError, match=exp_msg):
29+
with pytest.raises(exp_err, match=exp_msg):
2930
Categorical([1, 2, 3], ordered=ordered)
3031

32+
with pytest.raises(exp_err, match=exp_msg):
33+
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
34+
ordered=ordered)
35+
3136
def test_constructor_empty(self):
3237
# GH 17248
3338
c = Categorical([])
@@ -416,41 +421,76 @@ def test_constructor_with_categorical_categories(self):
416421
tm.assert_categorical_equal(result, expected)
417422

418423
def test_from_codes(self):
419-
dtype = CategoricalDtype(categories=[1, 2])
420-
421-
# no dtype or categories
422-
msg = 'Must specify `dtype`.'
423-
with pytest.raises(ValueError, match=msg):
424-
Categorical.from_codes([1, 2])
425424

426425
# too few categories
426+
dtype = CategoricalDtype(categories=[1, 2])
427427
msg = "codes need to be between "
428+
with pytest.raises(ValueError, match=msg):
429+
Categorical.from_codes([1, 2], categories=dtype.categories)
428430
with pytest.raises(ValueError, match=msg):
429431
Categorical.from_codes([1, 2], dtype=dtype)
430432

431433
# no int codes
432434
msg = "codes need to be array-like integers"
435+
with pytest.raises(ValueError, match=msg):
436+
Categorical.from_codes(["a"], categories=dtype.categories)
433437
with pytest.raises(ValueError, match=msg):
434438
Categorical.from_codes(["a"], dtype=dtype)
435439

440+
# no unique categories
441+
with pytest.raises(ValueError,
442+
match="Categorical categories must be unique"):
443+
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
444+
445+
# NaN categories included
446+
with pytest.raises(ValueError,
447+
match="Categorial categories cannot be null"):
448+
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
449+
436450
# too negative
437451
dtype = CategoricalDtype(categories=["a", "b", "c"])
438452
msg = r"codes need to be between -1 and len\(categories\)-1"
453+
with pytest.raises(ValueError, match=msg):
454+
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
439455
with pytest.raises(ValueError, match=msg):
440456
Categorical.from_codes([-2, 1, 2], dtype=dtype)
441457

442458
exp = Categorical(["a", "b", "c"], ordered=False)
459+
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
460+
tm.assert_categorical_equal(exp, res)
461+
443462
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
444463
tm.assert_categorical_equal(exp, res)
445464

446465
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
447466
dtype = CategoricalDtype(categories=["train", "test"])
467+
Categorical.from_codes(codes, categories=dtype.categories)
448468
Categorical.from_codes(codes, dtype=dtype)
449469

470+
def test_from_codes_with_categorical_categories(self):
471+
# GH17884
472+
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
473+
474+
result = Categorical.from_codes(
475+
[0, 1], categories=Categorical(['a', 'b', 'c']))
476+
tm.assert_categorical_equal(result, expected)
477+
478+
result = Categorical.from_codes(
479+
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
480+
tm.assert_categorical_equal(result, expected)
481+
482+
# non-unique Categorical still raises
483+
with pytest.raises(ValueError,
484+
match="Categorical categories must be unique"):
485+
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
486+
450487
def test_from_codes_with_nan_code(self):
451488
# GH21767
452489
codes = [1, 2, np.nan]
453490
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
491+
with pytest.raises(ValueError,
492+
match="codes need to be array-like integers"):
493+
Categorical.from_codes(codes, categories=dtype.categories)
454494
with pytest.raises(ValueError,
455495
match="codes need to be array-like integers"):
456496
Categorical.from_codes(codes, dtype=dtype)
@@ -460,43 +500,36 @@ def test_from_codes_with_float(self):
460500
codes = [1.0, 2.0, 0] # integer, but in float dtype
461501
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
462502

463-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
503+
with tm.assert_produces_warning(FutureWarning):
504+
cat = Categorical.from_codes(codes, dtype.categories)
505+
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
506+
507+
with tm.assert_produces_warning(FutureWarning):
464508
cat = Categorical.from_codes(codes, dtype=dtype)
465509
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
466510

467511
codes = [1.1, 2.0, 0] # non-integer
512+
with pytest.raises(ValueError,
513+
match="codes need to be array-like integers"):
514+
Categorical.from_codes(codes, dtype.categories)
468515
with pytest.raises(ValueError,
469516
match="codes need to be array-like integers"):
470517
Categorical.from_codes(codes, dtype=dtype)
471518

472-
def test_from_codes_deprecated(self, ordered):
473-
# GH24398
474-
cats = ['a', 'b']
475-
with tm.assert_produces_warning(FutureWarning):
476-
Categorical.from_codes([0, 1], categories=cats)
477-
478-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
479-
Categorical.from_codes([0, 1], categories=cats, ordered=True)
480-
481-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
482-
Categorical.from_codes([0, 1], categories=cats, ordered=False)
483-
484519
@pytest.mark.parametrize('dtype', [None, 'category'])
485520
def test_from_inferred_categories(self, dtype):
486521
cats = ['a', 'b']
487522
codes = np.array([0, 0, 1, 1], dtype='i8')
488523
result = Categorical._from_inferred_categories(cats, codes, dtype)
489-
expected = Categorical.from_codes(codes,
490-
dtype=CategoricalDtype(cats))
524+
expected = Categorical.from_codes(codes, cats)
491525
tm.assert_categorical_equal(result, expected)
492526

493527
@pytest.mark.parametrize('dtype', [None, 'category'])
494528
def test_from_inferred_categories_sorts(self, dtype):
495529
cats = ['b', 'a']
496530
codes = np.array([0, 1, 1, 1], dtype='i8')
497531
result = Categorical._from_inferred_categories(cats, codes, dtype)
498-
expected = Categorical.from_codes([1, 0, 0, 0],
499-
dtype=CategoricalDtype(['a', 'b']))
532+
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
500533
tm.assert_categorical_equal(result, expected)
501534

502535
def test_from_inferred_categories_dtype(self):
Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,25 @@
11
# -*- coding: utf-8 -*-
22

33
from pandas import Categorical
4-
from pandas.api.types import CategoricalDtype
54
import pandas.util.testing as tm
65

76

87
class TestCategoricalSubclassing(object):
98

109
def test_constructor(self):
11-
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
12-
assert isinstance(subclassed, tm.SubclassedCategorical)
13-
tm.assert_categorical_equal(subclassed, Categorical(['a', 'b', 'c']))
10+
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
11+
assert isinstance(sc, tm.SubclassedCategorical)
12+
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))
1413

1514
def test_from_codes(self):
16-
dtype = CategoricalDtype(['a', 'b', 'c'])
17-
subclassed = tm.SubclassedCategorical.from_codes([1, 0, 2],
18-
dtype=dtype)
19-
assert isinstance(subclassed, tm.SubclassedCategorical)
20-
21-
expected = Categorical.from_codes([1, 0, 2], dtype=dtype)
22-
tm.assert_categorical_equal(subclassed, expected)
15+
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
16+
assert isinstance(sc, tm.SubclassedCategorical)
17+
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
18+
tm.assert_categorical_equal(sc, exp)
2319

2420
def test_map(self):
25-
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
26-
result = subclassed.map(lambda x: x.upper())
27-
assert isinstance(result, tm.SubclassedCategorical)
28-
expected = Categorical(['A', 'B', 'C'])
29-
tm.assert_categorical_equal(result, expected)
21+
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
22+
res = sc.map(lambda x: x.upper())
23+
assert isinstance(res, tm.SubclassedCategorical)
24+
exp = Categorical(['A', 'B', 'C'])
25+
tm.assert_categorical_equal(res, exp)

0 commit comments

Comments
 (0)