Skip to content

Commit 1115ce0

Browse files
committed
CLN: use self.dtype rather than self.categories/ordered in Categorical (pandas-dev#22513)
1 parent 6b83df9 commit 1115ce0

File tree

2 files changed

+29
-41
lines changed

2 files changed

+29
-41
lines changed

pandas/core/arrays/categorical.py

Lines changed: 28 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -499,10 +499,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
499499

500500
def copy(self):
501501
""" Copy constructor. """
502-
return self._constructor(values=self._codes.copy(),
503-
categories=self.categories,
504-
ordered=self.ordered,
505-
fastpath=True)
502+
codes = self._codes.copy()
503+
return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
506504

507505
def astype(self, dtype, copy=True):
508506
"""
@@ -635,6 +633,7 @@ def from_codes(cls, codes, categories, ordered=False):
635633
categorical. If not given, the resulting categorical will be
636634
unordered.
637635
"""
636+
dtype = CategoricalDtype(categories, ordered=ordered)
638637
codes = np.asarray(codes) # #21767
639638
if not is_integer_dtype(codes):
640639
msg = "codes need to be array-like integers"
@@ -649,19 +648,17 @@ def from_codes(cls, codes, categories, ordered=False):
649648
raise ValueError(msg)
650649

651650
try:
652-
codes = coerce_indexer_dtype(codes, categories)
651+
codes = coerce_indexer_dtype(codes, dtype.categories)
653652
except (ValueError, TypeError):
654653
raise ValueError(
655654
"codes need to be convertible to an arrays of integers")
656655

657-
categories = CategoricalDtype.validate_categories(categories)
658-
659-
if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
656+
if len(codes) and (codes.max() >= len(dtype.categories) or
657+
codes.min() < -1):
660658
raise ValueError("codes need to be between -1 and "
661659
"len(categories)-1")
662660

663-
return cls(codes, categories=categories, ordered=ordered,
664-
fastpath=True)
661+
return cls(codes, dtype=dtype, fastpath=True)
665662

666663
_codes = None
667664

@@ -1632,8 +1629,8 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'):
16321629
self._codes = codes
16331630
return
16341631
else:
1635-
return self._constructor(values=codes, categories=self.categories,
1636-
ordered=self.ordered, fastpath=True)
1632+
return self._constructor(values=codes, dtype=self.dtype,
1633+
fastpath=True)
16371634

16381635
def _values_for_rank(self):
16391636
"""
@@ -1737,15 +1734,15 @@ def fillna(self, value=None, method=None, limit=None):
17371734
raise NotImplementedError("specifying a limit for fillna has not "
17381735
"been implemented yet")
17391736

1740-
values = self._codes
1737+
codes = self._codes
17411738

17421739
# pad / bfill
17431740
if method is not None:
17441741

1745-
values = self.to_dense().reshape(-1, len(self))
1746-
values = interpolate_2d(values, method, 0, None,
1747-
value).astype(self.categories.dtype)[0]
1748-
values = _get_codes_for_values(values, self.categories)
1742+
arr = self.to_dense().reshape(-1, len(self))
1743+
arr = interpolate_2d(arr, method, 0, None,
1744+
value).astype(self.categories.dtype)[0]
1745+
codes = _get_codes_for_values(arr, self.categories)
17491746

17501747
else:
17511748

@@ -1757,28 +1754,26 @@ def fillna(self, value=None, method=None, limit=None):
17571754

17581755
values_codes = _get_codes_for_values(value, self.categories)
17591756
indexer = np.where(values_codes != -1)
1760-
values[indexer] = values_codes[values_codes != -1]
1757+
codes[indexer] = values_codes[values_codes != -1]
17611758

17621759
# If value is not a dict or Series it should be a scalar
17631760
elif is_hashable(value):
17641761
if not isna(value) and value not in self.categories:
17651762
raise ValueError("fill value must be in categories")
17661763

1767-
mask = values == -1
1764+
mask = codes == -1
17681765
if mask.any():
1769-
values = values.copy()
1766+
codes = codes.copy()
17701767
if isna(value):
1771-
values[mask] = -1
1768+
codes[mask] = -1
17721769
else:
1773-
values[mask] = self.categories.get_loc(value)
1770+
codes[mask] = self.categories.get_loc(value)
17741771

17751772
else:
17761773
raise TypeError('"value" parameter must be a scalar, dict '
17771774
'or Series, but you passed a '
17781775
'"{0}"'.format(type(value).__name__))
1779-
1780-
return self._constructor(values, categories=self.categories,
1781-
ordered=self.ordered, fastpath=True)
1776+
return self._constructor(codes, dtype=self.dtype, fastpath=True)
17821777

17831778
def take_nd(self, indexer, allow_fill=None, fill_value=None):
17841779
"""
@@ -1823,8 +1818,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None):
18231818

18241819
codes = take(self._codes, indexer, allow_fill=allow_fill,
18251820
fill_value=fill_value)
1826-
result = self._constructor(codes, categories=self.categories,
1827-
ordered=self.ordered, fastpath=True)
1821+
result = self._constructor(codes, dtype=self.dtype, fastpath=True)
18281822
return result
18291823

18301824
take = take_nd
@@ -1843,9 +1837,8 @@ def _slice(self, slicer):
18431837
"categorical")
18441838
slicer = slicer[1]
18451839

1846-
_codes = self._codes[slicer]
1847-
return self._constructor(values=_codes, categories=self.categories,
1848-
ordered=self.ordered, fastpath=True)
1840+
codes = self._codes[slicer]
1841+
return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
18491842

18501843
def __len__(self):
18511844
"""The length of this Categorical."""
@@ -2152,14 +2145,12 @@ def mode(self, dropna=True):
21522145
"""
21532146

21542147
import pandas._libs.hashtable as htable
2155-
values = self._codes
2148+
codes = self._codes
21562149
if dropna:
21572150
good = self._codes != -1
2158-
values = self._codes[good]
2159-
values = sorted(htable.mode_int64(ensure_int64(values), dropna))
2160-
result = self._constructor(values=values, categories=self.categories,
2161-
ordered=self.ordered, fastpath=True)
2162-
return result
2151+
codes = self._codes[good]
2152+
codes = sorted(htable.mode_int64(ensure_int64(codes), dropna))
2153+
return self._constructor(codes, dtype=self.dtype, fastpath=True)
21632154

21642155
def unique(self):
21652156
"""
@@ -2298,8 +2289,7 @@ def repeat(self, repeats, *args, **kwargs):
22982289
"""
22992290
nv.validate_repeat(args, kwargs)
23002291
codes = self._codes.repeat(repeats)
2301-
return self._constructor(values=codes, categories=self.categories,
2302-
ordered=self.ordered, fastpath=True)
2292+
return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
23032293

23042294
# Implement the ExtensionArray interface
23052295
@property

pandas/core/indexes/category.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -460,9 +460,7 @@ def where(self, cond, other=None):
460460
other = self._na_value
461461
values = np.where(cond, self.values, other)
462462

463-
cat = Categorical(values,
464-
categories=self.categories,
465-
ordered=self.ordered)
463+
cat = Categorical(values, dtype=self.dtype)
466464
return self._shallow_copy(cat, **self._get_attributes_dict())
467465

468466
def reindex(self, target, method=None, level=None, limit=None,

0 commit comments

Comments
 (0)