@@ -499,10 +499,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
499
499
500
500
def copy (self ):
501
501
""" Copy constructor. """
502
- return self ._constructor (values = self ._codes .copy (),
503
- categories = self .categories ,
504
- ordered = self .ordered ,
505
- fastpath = True )
502
+ codes = self ._codes .copy ()
503
+ return self ._constructor (values = codes , dtype = self .dtype , fastpath = True )
506
504
507
505
def astype (self , dtype , copy = True ):
508
506
"""
@@ -635,6 +633,7 @@ def from_codes(cls, codes, categories, ordered=False):
635
633
categorical. If not given, the resulting categorical will be
636
634
unordered.
637
635
"""
636
+ dtype = CategoricalDtype (categories , ordered = ordered )
638
637
codes = np .asarray (codes ) # #21767
639
638
if not is_integer_dtype (codes ):
640
639
msg = "codes need to be array-like integers"
@@ -649,19 +648,17 @@ def from_codes(cls, codes, categories, ordered=False):
649
648
raise ValueError (msg )
650
649
651
650
try :
652
- codes = coerce_indexer_dtype (codes , categories )
651
+ codes = coerce_indexer_dtype (codes , dtype . categories )
653
652
except (ValueError , TypeError ):
654
653
raise ValueError (
655
654
"codes need to be convertible to an arrays of integers" )
656
655
657
- categories = CategoricalDtype .validate_categories (categories )
658
-
659
- if len (codes ) and (codes .max () >= len (categories ) or codes .min () < - 1 ):
656
+ if len (codes ) and (codes .max () >= len (dtype .categories ) or
657
+ codes .min () < - 1 ):
660
658
raise ValueError ("codes need to be between -1 and "
661
659
"len(categories)-1" )
662
660
663
- return cls (codes , categories = categories , ordered = ordered ,
664
- fastpath = True )
661
+ return cls (codes , dtype = dtype , fastpath = True )
665
662
666
663
_codes = None
667
664
@@ -1632,8 +1629,8 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'):
1632
1629
self ._codes = codes
1633
1630
return
1634
1631
else :
1635
- return self ._constructor (values = codes , categories = self .categories ,
1636
- ordered = self . ordered , fastpath = True )
1632
+ return self ._constructor (values = codes , dtype = self .dtype ,
1633
+ fastpath = True )
1637
1634
1638
1635
def _values_for_rank (self ):
1639
1636
"""
@@ -1737,15 +1734,15 @@ def fillna(self, value=None, method=None, limit=None):
1737
1734
raise NotImplementedError ("specifying a limit for fillna has not "
1738
1735
"been implemented yet" )
1739
1736
1740
- values = self ._codes
1737
+ codes = self ._codes
1741
1738
1742
1739
# pad / bfill
1743
1740
if method is not None :
1744
1741
1745
- values = self .to_dense ().reshape (- 1 , len (self ))
1746
- values = interpolate_2d (values , method , 0 , None ,
1747
- value ).astype (self .categories .dtype )[0 ]
1748
- values = _get_codes_for_values (values , self .categories )
1742
+ arr = self .to_dense ().reshape (- 1 , len (self ))
1743
+ arr = interpolate_2d (arr , method , 0 , None ,
1744
+ value ).astype (self .categories .dtype )[0 ]
1745
+ codes = _get_codes_for_values (arr , self .categories )
1749
1746
1750
1747
else :
1751
1748
@@ -1757,28 +1754,26 @@ def fillna(self, value=None, method=None, limit=None):
1757
1754
1758
1755
values_codes = _get_codes_for_values (value , self .categories )
1759
1756
indexer = np .where (values_codes != - 1 )
1760
- values [indexer ] = values_codes [values_codes != - 1 ]
1757
+ codes [indexer ] = values_codes [values_codes != - 1 ]
1761
1758
1762
1759
# If value is not a dict or Series it should be a scalar
1763
1760
elif is_hashable (value ):
1764
1761
if not isna (value ) and value not in self .categories :
1765
1762
raise ValueError ("fill value must be in categories" )
1766
1763
1767
- mask = values == - 1
1764
+ mask = codes == - 1
1768
1765
if mask .any ():
1769
- values = values .copy ()
1766
+ codes = codes .copy ()
1770
1767
if isna (value ):
1771
- values [mask ] = - 1
1768
+ codes [mask ] = - 1
1772
1769
else :
1773
- values [mask ] = self .categories .get_loc (value )
1770
+ codes [mask ] = self .categories .get_loc (value )
1774
1771
1775
1772
else :
1776
1773
raise TypeError ('"value" parameter must be a scalar, dict '
1777
1774
'or Series, but you passed a '
1778
1775
'"{0}"' .format (type (value ).__name__ ))
1779
-
1780
- return self ._constructor (values , categories = self .categories ,
1781
- ordered = self .ordered , fastpath = True )
1776
+ return self ._constructor (codes , dtype = self .dtype , fastpath = True )
1782
1777
1783
1778
def take_nd (self , indexer , allow_fill = None , fill_value = None ):
1784
1779
"""
@@ -1823,8 +1818,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None):
1823
1818
1824
1819
codes = take (self ._codes , indexer , allow_fill = allow_fill ,
1825
1820
fill_value = fill_value )
1826
- result = self ._constructor (codes , categories = self .categories ,
1827
- ordered = self .ordered , fastpath = True )
1821
+ result = self ._constructor (codes , dtype = self .dtype , fastpath = True )
1828
1822
return result
1829
1823
1830
1824
take = take_nd
@@ -1843,9 +1837,8 @@ def _slice(self, slicer):
1843
1837
"categorical" )
1844
1838
slicer = slicer [1 ]
1845
1839
1846
- _codes = self ._codes [slicer ]
1847
- return self ._constructor (values = _codes , categories = self .categories ,
1848
- ordered = self .ordered , fastpath = True )
1840
+ codes = self ._codes [slicer ]
1841
+ return self ._constructor (values = codes , dtype = self .dtype , fastpath = True )
1849
1842
1850
1843
def __len__ (self ):
1851
1844
"""The length of this Categorical."""
@@ -2152,14 +2145,12 @@ def mode(self, dropna=True):
2152
2145
"""
2153
2146
2154
2147
import pandas ._libs .hashtable as htable
2155
- values = self ._codes
2148
+ codes = self ._codes
2156
2149
if dropna :
2157
2150
good = self ._codes != - 1
2158
- values = self ._codes [good ]
2159
- values = sorted (htable .mode_int64 (ensure_int64 (values ), dropna ))
2160
- result = self ._constructor (values = values , categories = self .categories ,
2161
- ordered = self .ordered , fastpath = True )
2162
- return result
2151
+ codes = self ._codes [good ]
2152
+ codes = sorted (htable .mode_int64 (ensure_int64 (codes ), dropna ))
2153
+ return self ._constructor (codes , dtype = self .dtype , fastpath = True )
2163
2154
2164
2155
def unique (self ):
2165
2156
"""
@@ -2298,8 +2289,7 @@ def repeat(self, repeats, *args, **kwargs):
2298
2289
"""
2299
2290
nv .validate_repeat (args , kwargs )
2300
2291
codes = self ._codes .repeat (repeats )
2301
- return self ._constructor (values = codes , categories = self .categories ,
2302
- ordered = self .ordered , fastpath = True )
2292
+ return self ._constructor (values = codes , dtype = self .dtype , fastpath = True )
2303
2293
2304
2294
# Implement the ExtensionArray interface
2305
2295
@property
0 commit comments