Skip to content

Commit d92226f

Browse files
committed
Review comments for dummies implementation
1 parent 09772ac commit d92226f

File tree

1 file changed

+18
-21
lines changed

1 file changed

+18
-21
lines changed

pandas/core/arrays/categorical.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import operator
22
from shutil import get_terminal_size
3-
from typing import TYPE_CHECKING, Dict, Hashable, List, Type, Union, cast
3+
from typing import TYPE_CHECKING, Dict, Hashable, List, Optional, Type, Union, cast
44
from warnings import warn
55

66
import numpy as np
@@ -383,26 +383,27 @@ def __init__(
383383
self._codes = coerce_indexer_dtype(codes, dtype.categories)
384384

385385
@classmethod
386-
def from_dummies(cls, dummies: "DataFrame", ordered=None):
387-
"""
388-
Create a `Categorical` using a ``DataFrame`` encoding those categories
389-
as dummy/ one-hot encoded variables.
386+
def from_dummies(
387+
cls, dummies: "DataFrame", ordered: Optional[bool] = None
388+
) -> "Categorical":
389+
"""Create a `Categorical` using a ``DataFrame`` of dummy variables.
390390
391391
The ``DataFrame`` must be coercible to boolean,
392392
and have no more than one truthy value per row.
393393
The columns of the ``DataFrame`` become the categories of the `Categorical`.
394-
A column whose header is NA will be dropped.
394+
A column whose header is NA will be dropped;
395+
any row with a NA value will be uncategorised.
395396
396397
Parameters
397398
----------
398-
dummies : DataFrame of bool-like
399-
ordered : bool
400-
Whether or not this Categorical is ordered.
399+
dummies : DataFrame of bool-like
400+
ordered : bool
401+
Whether or not this Categorical is ordered.
401402
402403
Raises
403404
------
404-
ValueError
405-
If a sample belongs to >1 category
405+
ValueError
406+
If a sample belongs to >1 category
406407
407408
Returns
408409
-------
@@ -418,34 +419,30 @@ def from_dummies(cls, dummies: "DataFrame", ordered=None):
418419
Categories (3, object): [a, b, c]
419420
"""
420421
# GH 8745
421-
from pandas import Series
422-
423422
df = dummies.drop(columns=np.nan, errors="ignore").astype(bool)
424423

425424
if (df.sum(axis=1) > 1).any():
426425
raise ValueError("Some rows belong to >1 category")
427426

428-
index_into = Series([np.nan] + list(df.columns))
429-
mult_by = np.arange(1, len(index_into))
427+
mult_by = np.arange(1, df.shape[1] + 1)
430428

431429
codes = (df.astype(int) * mult_by).sum(axis=1) - 1
432430
codes[codes.isna()] = -1
433431
return cls.from_codes(codes, df.columns.values, ordered=ordered)
434432

435433
def to_dummies(self, na_column=None) -> "DataFrame":
436434
"""
437-
Create a ``DataFrame`` representing this `Categorical`
438-
as dummy/ one-hot encoded variables.
435+
Create a ``DataFrame`` of boolean dummy variables representing this object.
439436
440437
For more power over column names or to use a sparse matrix,
441438
see :func:`pandas.get_dummies`.
442439
443440
Parameters
444441
----------
445-
na_column : Optional
446-
If None, NA values will be represented as a row of zeros.
447-
Otherwise, this is the name of a new column representing
448-
those NA values.
442+
na_column : Optional
443+
If None, NA values will be represented as a row of zeros.
444+
Otherwise, this is the name of a new column representing
445+
those NA values.
449446
450447
Returns
451448
-------

0 commit comments

Comments
 (0)