1
1
import operator
2
2
from shutil import get_terminal_size
3
- from typing import TYPE_CHECKING , Dict , Hashable , List , Type , Union , cast
3
+ from typing import TYPE_CHECKING , Dict , Hashable , List , Optional , Type , Union , cast
4
4
from warnings import warn
5
5
6
6
import numpy as np
@@ -383,26 +383,27 @@ def __init__(
383
383
self ._codes = coerce_indexer_dtype (codes , dtype .categories )
384
384
385
385
@classmethod
386
- def from_dummies (cls , dummies : "DataFrame" , ordered = None ):
387
- """
388
- Create a ` Categorical` using a ``DataFrame`` encoding those categories
389
- as dummy/ one-hot encoded variables.
386
+ def from_dummies (
387
+ cls , dummies : "DataFrame" , ordered : Optional [ bool ] = None
388
+ ) -> " Categorical" :
389
+ """Create a `Categorical` using a ``DataFrame`` of dummy variables.
390
390
391
391
The ``DataFrame`` must be coercible to boolean,
392
392
and have no more than one truthy value per row.
393
393
The columns of the ``DataFrame`` become the categories of the `Categorical`.
394
- A column whose header is NA will be dropped.
394
+ A column whose header is NA will be dropped;
395
+ any row with a NA value will be uncategorised.
395
396
396
397
Parameters
397
398
----------
398
- dummies : DataFrame of bool-like
399
- ordered : bool
400
- Whether or not this Categorical is ordered.
399
+ dummies : DataFrame of bool-like
400
+ ordered : bool
401
+ Whether or not this Categorical is ordered.
401
402
402
403
Raises
403
404
------
404
- ValueError
405
- If a sample belongs to >1 category
405
+ ValueError
406
+ If a sample belongs to >1 category
406
407
407
408
Returns
408
409
-------
@@ -418,34 +419,30 @@ def from_dummies(cls, dummies: "DataFrame", ordered=None):
418
419
Categories (3, object): [a, b, c]
419
420
"""
420
421
# GH 8745
421
- from pandas import Series
422
-
423
422
df = dummies .drop (columns = np .nan , errors = "ignore" ).astype (bool )
424
423
425
424
if (df .sum (axis = 1 ) > 1 ).any ():
426
425
raise ValueError ("Some rows belong to >1 category" )
427
426
428
- index_into = Series ([np .nan ] + list (df .columns ))
429
- mult_by = np .arange (1 , len (index_into ))
427
+ mult_by = np .arange (1 , df .shape [1 ] + 1 )
430
428
431
429
codes = (df .astype (int ) * mult_by ).sum (axis = 1 ) - 1
432
430
codes [codes .isna ()] = - 1
433
431
return cls .from_codes (codes , df .columns .values , ordered = ordered )
434
432
435
433
def to_dummies (self , na_column = None ) -> "DataFrame" :
436
434
"""
437
- Create a ``DataFrame`` representing this `Categorical`
438
- as dummy/ one-hot encoded variables.
435
+ Create a ``DataFrame`` of boolean dummy variables representing this object.
439
436
440
437
For more power over column names or to use a sparse matrix,
441
438
see :func:`pandas.get_dummies`.
442
439
443
440
Parameters
444
441
----------
445
- na_column : Optional
446
- If None, NA values will be represented as a row of zeros.
447
- Otherwise, this is the name of a new column representing
448
- those NA values.
442
+ na_column : Optional
443
+ If None, NA values will be represented as a row of zeros.
444
+ Otherwise, this is the name of a new column representing
445
+ those NA values.
449
446
450
447
Returns
451
448
-------
0 commit comments