Skip to content

Commit 6cf8203

Browse files
committed
deprecate categories and ordered parameters
1 parent d86e754 commit 6cf8203

File tree

16 files changed

+138
-155
lines changed

16 files changed

+138
-155
lines changed

doc/source/whatsnew/v0.24.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,6 +1283,7 @@ Deprecations
12831283
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
12841284
- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`)
12851285
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
1286+
- :meth:`Categorical.from_codes` has deprecated parameters ``categories`` and ``ordered``. Supply a :class:`~pandas.api.types.CategoricalDtype` to new parameter ``dtype`` instead. (:issue:`24398`)
12861287
- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
12871288
- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
12881289
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)

pandas/core/arrays/categorical.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -605,9 +605,9 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
605605
@classmethod
606606
def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
607607
"""
608-
Make a Categorical type from codes and categories arrays.
608+
Make a Categorical type from codes and CategoricalDtype.
609609
610-
This constructor is useful if you already have codes and categories and
610+
This constructor is useful if you already have codes and the dtype and
611611
so do not need the (computation intensive) factorization step, which is
612612
usually done on the constructor.
613613
@@ -621,19 +621,21 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
621621
categories or -1 for NaN
622622
categories : index-like, optional
623623
The categories for the categorical. Items need to be unique.
624+
625+
.. deprecated:: 0.24.0
626+
Use ``dtype`` instead.
624627
ordered : bool, optional
625628
Whether or not this categorical is treated as an ordered
626629
categorical. If not given, the resulting categorical will be
627630
unordered.
628631
629-
.. versionchanged:: 0.24.0
630-
631-
The default value has been changed to ``None``. Previously
632-
the default value was ``False``.
633-
dtype : CategoricalDtype, optional
632+
.. deprecated:: 0.24.0
633+
Use ``dtype`` instead.
634+
dtype : CategoricalDtype
634635
An instance of ``CategoricalDtype`` to use for this categorical.
635636
636637
.. versionadded:: 0.24.0
638+
dtype will be required in the future.
637639
638640
Examples
639641
--------
@@ -642,8 +644,18 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
642644
[a, b, a, b]
643645
Categories (2, object): [a < b]
644646
"""
645-
dtype = CategoricalDtype._from_values_or_dtype(codes, categories,
646-
ordered, dtype)
647+
if dtype is not None:
648+
if categories is not None or ordered is not None:
649+
raise ValueError("Cannot specify `categories` or `ordered` "
650+
"together with `dtype`.")
651+
elif categories is None and dtype is None:
652+
raise ValueError("Must specify `dtype`.")
653+
else:
654+
msg = u("The 'categories' and 'ordered' keyword are deprecated "
655+
"and will be removed in a future version. Please use "
656+
"'dtype' instead.")
657+
warn(msg, FutureWarning, stacklevel=2)
658+
dtype = CategoricalDtype(categories, ordered)
647659

648660
codes = np.asarray(codes) # #21767
649661
if not is_integer_dtype(codes):
@@ -1211,9 +1223,8 @@ def map(self, mapper):
12111223
"""
12121224
new_categories = self.categories.map(mapper)
12131225
try:
1214-
return self.from_codes(self._codes.copy(),
1215-
categories=new_categories,
1216-
ordered=self.ordered)
1226+
new_dtype = CategoricalDtype(new_categories, ordered=self.ordered)
1227+
return self.from_codes(self._codes.copy(), dtype=new_dtype)
12171228
except ValueError:
12181229
# NA values are represented in self._codes with -1
12191230
# np.take causes NA values to take final element in new_categories

pandas/core/groupby/grouper.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.core.dtypes.common import (
1515
ensure_categorical, is_categorical_dtype, is_datetime64_dtype, is_hashable,
1616
is_list_like, is_scalar, is_timedelta64_dtype)
17+
from pandas.core.dtypes.dtypes import CategoricalDtype
1718
from pandas.core.dtypes.generic import ABCSeries
1819

1920
import pandas.core.algorithms as algorithms
@@ -292,21 +293,19 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
292293
from pandas.core.groupby.categorical import recode_for_groupby
293294
self.grouper, self.all_grouper = recode_for_groupby(
294295
self.grouper, self.sort, observed)
295-
categories = self.grouper.categories
296+
dtype = CategoricalDtype(self.grouper.categories,
297+
ordered=self.grouper.ordered)
296298

297299
# we make a CategoricalIndex out of the cat grouper
298300
# preserving the categories / ordered attributes
299301
self._labels = self.grouper.codes
300302
if observed:
301303
codes = algorithms.unique1d(self.grouper.codes)
302304
else:
303-
codes = np.arange(len(categories))
305+
codes = np.arange(len(dtype.categories))
304306

305307
self._group_index = CategoricalIndex(
306-
Categorical.from_codes(
307-
codes=codes,
308-
categories=categories,
309-
ordered=self.grouper.ordered))
308+
Categorical.from_codes(codes=codes, dtype=dtype))
310309

311310
# we are done
312311
if isinstance(self.grouper, Grouping):
@@ -395,8 +394,8 @@ def _make_labels(self):
395394

396395
@cache_readonly
397396
def groups(self):
398-
return self.index.groupby(Categorical.from_codes(self.labels,
399-
self.group_index))
397+
return self.index.groupby(
398+
Categorical(self.labels, self.group_index, fastpath=True))
400399

401400

402401
def _get_grouper(obj, key=None, axis=0, level=None, sort=True,

pandas/core/indexes/multi.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
ensure_int64, ensure_platform_int, is_categorical_dtype, is_hashable,
1919
is_integer, is_iterator, is_list_like, is_object_dtype, is_scalar,
2020
pandas_dtype)
21-
from pandas.core.dtypes.dtypes import ExtensionDtype, PandasExtensionDtype
21+
from pandas.core.dtypes.dtypes import (
22+
CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
2223
from pandas.core.dtypes.generic import ABCDataFrame
2324
from pandas.core.dtypes.missing import array_equivalent, isna
2425

@@ -2026,13 +2027,14 @@ def _get_codes_for_sorting(self):
20262027
"""
20272028
from pandas.core.arrays import Categorical
20282029

2029-
def cats(level_codes):
2030-
return np.arange(np.array(level_codes).max() + 1 if
2030+
def as_dtype(level_codes):
2031+
cats = np.arange(np.array(level_codes).max() + 1 if
20312032
len(level_codes) else 0,
20322033
dtype=level_codes.dtype)
2034+
return CategoricalDtype(cats, ordered=True)
20332035

2034-
return [Categorical.from_codes(level_codes, cats(level_codes),
2035-
ordered=True)
2036+
return [Categorical.from_codes(level_codes,
2037+
dtype=as_dtype(level_codes))
20362038
for level_codes in self.codes]
20372039

20382040
def sortlevel(self, level=0, ascending=True, sort_remaining=True):

pandas/io/packers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from pandas.core.dtypes.common import (
5656
is_categorical_dtype, is_datetime64tz_dtype, is_object_dtype,
5757
needs_i8_conversion, pandas_dtype)
58+
from pandas.core.dtypes.dtypes import CategoricalDtype as CDT
5859

5960
from pandas import ( # noqa:F401
6061
Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Float64Index,
@@ -621,9 +622,8 @@ def decode(obj):
621622
name=obj[u'name'])
622623
elif typ == u'category':
623624
from_codes = globals()[obj[u'klass']].from_codes
624-
return from_codes(codes=obj[u'codes'],
625-
categories=obj[u'categories'],
626-
ordered=obj[u'ordered'])
625+
dtype = CDT(obj[u'categories'], ordered=obj[u'ordered'])
626+
return from_codes(codes=obj[u'codes'], dtype=dtype)
627627

628628
elif typ == u'interval':
629629
return Interval(obj[u'left'], obj[u'right'], obj[u'closed'])

pandas/io/pytables.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
ensure_int64, ensure_object, ensure_platform_int, is_categorical_dtype,
2525
is_datetime64_dtype, is_datetime64tz_dtype, is_list_like,
2626
is_timedelta64_dtype)
27+
from pandas.core.dtypes.dtypes import CategoricalDtype
2728
from pandas.core.dtypes.missing import array_equivalent
2829

2930
from pandas import (
@@ -2206,10 +2207,8 @@ def convert(self, values, nan_rep, encoding, errors):
22062207
categories = categories[~mask]
22072208
codes[codes != -1] -= mask.astype(int).cumsum().values
22082209

2209-
self.data = Categorical.from_codes(codes,
2210-
categories=categories,
2211-
ordered=self.ordered)
2212-
2210+
dtype = CategoricalDtype(categories, ordered=self.ordered)
2211+
self.data = Categorical.from_codes(codes, dtype=dtype)
22132212
else:
22142213

22152214
try:

pandas/tests/arrays/categorical/test_constructors.py

Lines changed: 25 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,13 @@ class TestCategoricalConstructors(object):
2121
def test_validate_ordered(self):
2222
# see gh-14058
2323
exp_msg = "'ordered' must either be 'True' or 'False'"
24-
exp_err = TypeError
2524

26-
# This should be a boolean.
25+
# This should be a boolean or None.
2726
ordered = np.array([0, 1, 2])
2827

29-
with pytest.raises(exp_err, match=exp_msg):
28+
with pytest.raises(TypeError, match=exp_msg):
3029
Categorical([1, 2, 3], ordered=ordered)
3130

32-
with pytest.raises(exp_err, match=exp_msg):
33-
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
34-
ordered=ordered)
35-
3631
def test_constructor_empty(self):
3732
# GH 17248
3833
c = Categorical([])
@@ -421,76 +416,41 @@ def test_constructor_with_categorical_categories(self):
421416
tm.assert_categorical_equal(result, expected)
422417

423418
def test_from_codes(self):
419+
dtype = CategoricalDtype(categories=[1, 2])
420+
421+
# no dtype or categories
422+
msg = 'Must specify `dtype`.'
423+
with pytest.raises(ValueError, match=msg):
424+
Categorical.from_codes([1, 2])
424425

425426
# too few categories
426-
dtype = CategoricalDtype(categories=[1, 2])
427427
msg = "codes need to be between "
428-
with pytest.raises(ValueError, match=msg):
429-
Categorical.from_codes([1, 2], categories=dtype.categories)
430428
with pytest.raises(ValueError, match=msg):
431429
Categorical.from_codes([1, 2], dtype=dtype)
432430

433431
# no int codes
434432
msg = "codes need to be array-like integers"
435-
with pytest.raises(ValueError, match=msg):
436-
Categorical.from_codes(["a"], categories=dtype.categories)
437433
with pytest.raises(ValueError, match=msg):
438434
Categorical.from_codes(["a"], dtype=dtype)
439435

440-
# no unique categories
441-
with pytest.raises(ValueError,
442-
match="Categorical categories must be unique"):
443-
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
444-
445-
# NaN categories included
446-
with pytest.raises(ValueError,
447-
match="Categorial categories cannot be null"):
448-
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
449-
450436
# too negative
451437
dtype = CategoricalDtype(categories=["a", "b", "c"])
452438
msg = r"codes need to be between -1 and len\(categories\)-1"
453-
with pytest.raises(ValueError, match=msg):
454-
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
455439
with pytest.raises(ValueError, match=msg):
456440
Categorical.from_codes([-2, 1, 2], dtype=dtype)
457441

458442
exp = Categorical(["a", "b", "c"], ordered=False)
459-
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
460-
tm.assert_categorical_equal(exp, res)
461-
462443
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
463444
tm.assert_categorical_equal(exp, res)
464445

465446
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
466447
dtype = CategoricalDtype(categories=["train", "test"])
467-
Categorical.from_codes(codes, categories=dtype.categories)
468448
Categorical.from_codes(codes, dtype=dtype)
469449

470-
def test_from_codes_with_categorical_categories(self):
471-
# GH17884
472-
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
473-
474-
result = Categorical.from_codes(
475-
[0, 1], categories=Categorical(['a', 'b', 'c']))
476-
tm.assert_categorical_equal(result, expected)
477-
478-
result = Categorical.from_codes(
479-
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
480-
tm.assert_categorical_equal(result, expected)
481-
482-
# non-unique Categorical still raises
483-
with pytest.raises(ValueError,
484-
match="Categorical categories must be unique"):
485-
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
486-
487450
def test_from_codes_with_nan_code(self):
488451
# GH21767
489452
codes = [1, 2, np.nan]
490453
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
491-
with pytest.raises(ValueError,
492-
match="codes need to be array-like integers"):
493-
Categorical.from_codes(codes, categories=dtype.categories)
494454
with pytest.raises(ValueError,
495455
match="codes need to be array-like integers"):
496456
Categorical.from_codes(codes, dtype=dtype)
@@ -500,36 +460,43 @@ def test_from_codes_with_float(self):
500460
codes = [1.0, 2.0, 0] # integer, but in float dtype
501461
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
502462

503-
with tm.assert_produces_warning(FutureWarning):
504-
cat = Categorical.from_codes(codes, dtype.categories)
505-
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
506-
507-
with tm.assert_produces_warning(FutureWarning):
463+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
508464
cat = Categorical.from_codes(codes, dtype=dtype)
509465
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
510466

511467
codes = [1.1, 2.0, 0] # non-integer
512-
with pytest.raises(ValueError,
513-
match="codes need to be array-like integers"):
514-
Categorical.from_codes(codes, dtype.categories)
515468
with pytest.raises(ValueError,
516469
match="codes need to be array-like integers"):
517470
Categorical.from_codes(codes, dtype=dtype)
518471

472+
def test_from_codes_deprecated(self, ordered):
473+
# GH24398
474+
cats = ['a', 'b']
475+
with tm.assert_produces_warning(FutureWarning):
476+
Categorical.from_codes([0, 1], categories=cats)
477+
478+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
479+
Categorical.from_codes([0, 1], categories=cats, ordered=True)
480+
481+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
482+
Categorical.from_codes([0, 1], categories=cats, ordered=False)
483+
519484
@pytest.mark.parametrize('dtype', [None, 'category'])
520485
def test_from_inferred_categories(self, dtype):
521486
cats = ['a', 'b']
522487
codes = np.array([0, 0, 1, 1], dtype='i8')
523488
result = Categorical._from_inferred_categories(cats, codes, dtype)
524-
expected = Categorical.from_codes(codes, cats)
489+
expected = Categorical.from_codes(codes,
490+
dtype=CategoricalDtype(cats))
525491
tm.assert_categorical_equal(result, expected)
526492

527493
@pytest.mark.parametrize('dtype', [None, 'category'])
528494
def test_from_inferred_categories_sorts(self, dtype):
529495
cats = ['b', 'a']
530496
codes = np.array([0, 1, 1, 1], dtype='i8')
531497
result = Categorical._from_inferred_categories(cats, codes, dtype)
532-
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
498+
expected = Categorical.from_codes([1, 0, 0, 0],
499+
dtype=CategoricalDtype(['a', 'b']))
533500
tm.assert_categorical_equal(result, expected)
534501

535502
def test_from_inferred_categories_dtype(self):
Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,29 @@
11
# -*- coding: utf-8 -*-
22

33
from pandas import Categorical
4+
from pandas.api.types import CategoricalDtype
45
import pandas.util.testing as tm
56

67

78
class TestCategoricalSubclassing(object):
89

910
def test_constructor(self):
10-
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
11-
assert isinstance(sc, tm.SubclassedCategorical)
12-
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))
11+
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
12+
assert isinstance(subclassed, tm.SubclassedCategorical)
13+
tm.assert_categorical_equal(subclassed, Categorical(['a', 'b', 'c']))
1314

1415
def test_from_codes(self):
15-
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
16-
assert isinstance(sc, tm.SubclassedCategorical)
17-
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
18-
tm.assert_categorical_equal(sc, exp)
16+
dtype = CategoricalDtype(['a', 'b', 'c'])
17+
subclassed = tm.SubclassedCategorical.from_codes([1, 0, 2],
18+
dtype=dtype)
19+
assert isinstance(subclassed, tm.SubclassedCategorical)
20+
21+
expected = Categorical.from_codes([1, 0, 2], dtype=dtype)
22+
tm.assert_categorical_equal(subclassed, expected)
1923

2024
def test_map(self):
21-
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
22-
res = sc.map(lambda x: x.upper())
23-
assert isinstance(res, tm.SubclassedCategorical)
24-
exp = Categorical(['A', 'B', 'C'])
25-
tm.assert_categorical_equal(res, exp)
25+
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
26+
result = subclassed.map(lambda x: x.upper())
27+
assert isinstance(result, tm.SubclassedCategorical)
28+
expected = Categorical(['A', 'B', 'C'])
29+
tm.assert_categorical_equal(result, expected)

0 commit comments

Comments
 (0)