@@ -451,58 +451,103 @@ def from_dummies(
451
451
codes = ((df * mult_by ).sum (axis = 1 , skipna = False ) - 1 ).astype ("Int64" )
452
452
return cls .from_codes (codes .fillna (- 1 ), df .columns .values , ordered = ordered )
453
453
454
- def to_dummies (self , na_column = None ) -> "DataFrame" :
455
- """
456
- Create a ``DataFrame`` of boolean dummy variables representing this object.
457
-
458
- For more power over column names or to use a sparse matrix,
459
- see :func:`pandas.get_dummies`.
454
+ def get_dummies (
455
+ self ,
456
+ prefix = None ,
457
+ prefix_sep = "_" ,
458
+ dummy_na = False ,
459
+ sparse = False ,
460
+ drop_first = False ,
461
+ dtype = None ,
462
+ ) -> "DataFrame" :
463
+ """
464
+ Convert into dummy/indicator variables.
460
465
461
466
Parameters
462
467
----------
463
- na_column : Optional
464
- If None, NA values will be represented as a row of zeros.
465
- Otherwise, this is the name of a new column representing
466
- those NA values.
468
+ prefix : str, default None
469
+ String to append DataFrame column names.
470
+ prefix_sep : str, default '_'
471
+ If appending prefix, separator/delimiter to use.
472
+ dummy_na : bool, default False
473
+ Add a column to indicate NaNs, if False NaNs are ignored.
474
+ sparse : bool, default False
475
+ Whether the dummy-encoded columns should be backed by
476
+ a :class:`SparseArray` (True) or a regular NumPy array (False).
477
+ drop_first : bool, default False
478
+ Whether to get k-1 dummies out of k categorical levels by removing the
479
+ first level.
480
+ dtype : dtype, default np.uint8
481
+ Data type for new columns. Only a single dtype is allowed.
467
482
468
483
Returns
469
484
-------
470
485
DataFrame
471
-
472
- Examples
473
- --------
474
- >>> Categorical(["a", "b", "c"]).to_dummies()
475
- a b c
476
- 0 True False False
477
- 1 False True False
478
- 2 False False True
479
-
480
- >>> Categorical(["a", "b", np.nan]).to_dummies()
481
- a b
482
- 0 True False
483
- 1 False True
484
- 2 False False
485
-
486
- >>> Categorical(["a", "b", np.nan]).to_dummies("other")
487
- a b other
488
- 0 True False False
489
- 1 False True False
490
- 2 False False True
486
+ Dummy-coded data.
491
487
492
488
See Also
493
489
--------
494
- :func:`pandas.get_dummies`
495
- """
496
- from pandas import DataFrame , CategoricalIndex , Series
490
+ Series.str.get_dummies : Convert Series to dummy codes.
491
+ pandas.get_dummies : Convert categorical variable to dummy/indicator variables.
497
492
498
- eye = np .eye (len (self .categories ) + 1 , dtype = bool )
499
- arr = eye [self .codes , :]
500
-
501
- if na_column is None :
502
- return DataFrame (arr [:, :- 1 ], columns = CategoricalIndex (self .categories ))
503
- else :
504
- cats = CategoricalIndex (Series (list (self .categories ) + [na_column ]))
505
- return DataFrame (arr , columns = cats )
493
+ Examples
494
+ --------
495
+ >>> s = pd.Categorical(list('abca'))
496
+
497
+ >>> s.get_dummies()
498
+ a b c
499
+ 0 1 0 0
500
+ 1 0 1 0
501
+ 2 0 0 1
502
+ 3 1 0 0
503
+
504
+ >>> s1 = pd.Categorical(['a', 'b', np.nan])
505
+
506
+ >>> s1.get_dummies()
507
+ a b
508
+ 0 1 0
509
+ 1 0 1
510
+ 2 0 0
511
+
512
+ >>> s1.get_dummies(dummy_na=True)
513
+ a b NaN
514
+ 0 1 0 0
515
+ 1 0 1 0
516
+ 2 0 0 1
517
+
518
+ >>> pd.Categorical(list('abcaa)).get_dummies()
519
+ a b c
520
+ 0 1 0 0
521
+ 1 0 1 0
522
+ 2 0 0 1
523
+ 3 1 0 0
524
+ 4 1 0 0
525
+
526
+ >>> pd.Categorical(list('abcaa)).get_dummies(drop_first=True)
527
+ b c
528
+ 0 0 0
529
+ 1 1 0
530
+ 2 0 1
531
+ 3 0 0
532
+ 4 0 0
533
+
534
+ >>> pd.Categorical(list('abc')).get_dummies(dtype=float)
535
+ a b c
536
+ 0 1.0 0.0 0.0
537
+ 1 0.0 1.0 0.0
538
+ 2 0.0 0.0 1.0
539
+ """
540
+ from pandas import _get_dummies_1d
541
+
542
+ return _get_dummies_1d (
543
+ self ,
544
+ prefix = prefix ,
545
+ prefix_sep = prefix_sep ,
546
+ dummy_na = dummy_na ,
547
+ sparse = sparse ,
548
+ drop_first = drop_first ,
549
+ dtype = dtype ,
550
+ )
506
551
507
552
@property
508
553
def dtype (self ) -> CategoricalDtype :
0 commit comments