Skip to content

Commit 3c77d94

Browse files
committed
Implement _make_accessor as classmethod on StringMethods
Expand some inline if/else blocks
1 parent dbc149d commit 3c77d94

File tree

1 file changed

+39
-21
lines changed

1 file changed

+39
-21
lines changed

pandas/core/strings.py

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
from pandas.core.algorithms import take_1d
1818
import pandas.compat as compat
19+
from pandas.core import accessors
20+
1921
from pandas.core.accessors import AccessorProperty
2022
from pandas.core.base import NoNewAttributesMixin
2123
from pandas.util._decorators import Appender
@@ -1437,7 +1439,11 @@ class StringMethods(NoNewAttributesMixin):
14371439

14381440
def __init__(self, data):
14391441
self._is_categorical = is_categorical_dtype(data)
1440-
self._data = data.cat.categories if self._is_categorical else data
1442+
if self._is_categorical:
1443+
self._data = data.cat.categories
1444+
else:
1445+
self._data = data
1446+
14411447
# save orig to blow up categoricals to the right type
14421448
self._orig = data
14431449
self._freeze()
@@ -1456,8 +1462,7 @@ def __iter__(self):
14561462
i += 1
14571463
g = self.get(i)
14581464

1459-
def _wrap_result(self, result, use_codes=True,
1460-
name=None, expand=None):
1465+
def _wrap_result(self, result, use_codes=True, name=None, expand=None):
14611466

14621467
from pandas.core.index import Index, MultiIndex
14631468

@@ -1475,7 +1480,7 @@ def _wrap_result(self, result, use_codes=True,
14751480

14761481
if expand is None:
14771482
# infer from ndim if expand is not specified
1478-
expand = False if result.ndim == 1 else True
1483+
expand = result.ndim != 1
14791484

14801485
elif expand is True and not isinstance(self._orig, Index):
14811486
# required when expand=True is explicitly specified
@@ -1527,7 +1532,10 @@ def cons_row(x):
15271532

15281533
@copy_doc(str_cat)
15291534
def cat(self, others=None, sep=None, na_rep=None):
1530-
data = self._orig if self._is_categorical else self._data
1535+
if self._is_categorical:
1536+
data = self._orig
1537+
else:
1538+
data = self._data
15311539
result = str_cat(data, others=others, sep=sep, na_rep=na_rep)
15321540
return self._wrap_result(result, use_codes=(not self._is_categorical))
15331541

@@ -1739,7 +1747,10 @@ def wrap(self, width, **kwargs):
17391747
def get_dummies(self, sep='|'):
17401748
# we need to cast to Series of strings as only that has all
17411749
# methods available for making the dummies...
1742-
data = self._orig.astype(str) if self._is_categorical else self._data
1750+
if self._is_categorical:
1751+
data = self._orig.astype(str)
1752+
else:
1753+
data = self._data
17431754
result, name = str_get_dummies(data, sep)
17441755
return self._wrap_result(result, use_codes=(not self._is_categorical),
17451756
name=name, expand=True)
@@ -1900,18 +1911,14 @@ def rindex(self, sub, start=0, end=None):
19001911
docstring=_shared_docs['ismethods'] %
19011912
_shared_docs['isdecimal'])
19021913

1903-
1904-
class StringAccessorMixin(object):
1905-
""" Mixin to add a `.str` acessor to the class."""
1906-
1907-
# string methods
1908-
def _make_str_accessor(self):
1914+
@classmethod
1915+
def _make_accessor(cls, data):
19091916
from pandas.core.index import Index
19101917

1911-
if (isinstance(self, ABCSeries) and
1912-
not ((is_categorical_dtype(self.dtype) and
1913-
is_object_dtype(self.values.categories)) or
1914-
(is_object_dtype(self.dtype)))):
1918+
if (isinstance(data, ABCSeries) and
1919+
not ((is_categorical_dtype(data.dtype) and
1920+
is_object_dtype(data.values.categories)) or
1921+
(is_object_dtype(data.dtype)))):
19151922
# it's neither a string series not a categorical series with
19161923
# strings inside the categories.
19171924
# this really should exclude all series with any non-string values
@@ -1920,23 +1927,34 @@ def _make_str_accessor(self):
19201927
raise AttributeError("Can only use .str accessor with string "
19211928
"values, which use np.object_ dtype in "
19221929
"pandas")
1923-
elif isinstance(self, Index):
1930+
elif isinstance(data, Index):
19241931
# can't use ABCIndex to exclude non-str
19251932

19261933
# see scc/inferrence.pyx which can contain string values
19271934
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
1928-
if self.inferred_type not in allowed_types:
1935+
if data.inferred_type not in allowed_types:
19291936
message = ("Can only use .str accessor with string values "
19301937
"(i.e. inferred_type is 'string', 'unicode' or "
19311938
"'mixed')")
19321939
raise AttributeError(message)
1933-
if self.nlevels > 1:
1940+
if data.nlevels > 1:
19341941
message = ("Can only use .str accessor with Index, not "
19351942
"MultiIndex")
19361943
raise AttributeError(message)
1937-
return StringMethods(self)
1944+
return StringAccessor(data)
1945+
1946+
StringAccessor = StringMethods # Alias to mirror CategoricalAccessor
1947+
1948+
1949+
# TODO: This is only mixed in to Index (this PR takes it out of Series)
1950+
# and the _dir_additions/_dir_deletions won't play nicely with
1951+
# any other class this gets mixed into that *does* implement its own
1952+
# _dir_additions/_dir_deletions. This should be deprecated.
1953+
class StringAccessorMixin(object):
1954+
""" Mixin to add a `.str` acessor to the class."""
1955+
19381956

1939-
str = AccessorProperty(StringMethods, _make_str_accessor)
1957+
str = accessors.AccessorProperty(StringAccessor)
19401958

19411959
def _dir_additions(self):
19421960
return set()

0 commit comments

Comments
 (0)