Skip to content

Commit 19f7ff6

Browse files
committed
Add example/recipe
1 parent 3c77d94 commit 19f7ff6

File tree

1 file changed

+308
-48
lines changed

1 file changed

+308
-48
lines changed

pandas/core/accessors.py

Lines changed: 308 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,198 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3+
"""
34
4-
from pandas.core.base import PandasObject
5+
An example/recipe for creating a custom accessor.
56
67
7-
class PandasDelegate(PandasObject):
8-
""" an abstract base class for delegating methods/properties """
8+
The primary use case for accessors is when a Series contains instances
9+
of a particular class and we want to access properties/methods of these
10+
instances in Series form.
911
10-
def _delegate_property_get(self, name, *args, **kwargs):
11-
raise TypeError("You cannot access the "
12-
"property {name}".format(name=name))
12+
Suppose we have a custom State class representing US states:
1313
14-
def _delegate_property_set(self, name, value, *args, **kwargs):
15-
raise TypeError("The property {name} cannot be set".format(name=name))
14+
class State(object):
15+
def __repr__(self):
16+
return repr(self.name)
1617
17-
def _delegate_method(self, name, *args, **kwargs):
18-
raise TypeError("You cannot call method {name}".format(name=name))
18+
def __init__(self, name):
19+
self.name = name
20+
self._abbrev_dict = {'California': 'CA', 'Alabama': 'AL'}
21+
22+
@property
23+
def abbrev(self):
24+
return self._abbrev_dict[self.name]
25+
26+
@abbrev.setter
27+
def abbrev(self, value):
28+
self._abbrev_dict[self.name] = value
29+
30+
def fips(self):
31+
return {'California': 6, 'Alabama': 1}[self.name]
32+
33+
34+
We can construct a series of these objects:
35+
36+
>>> ser = pd.Series([State('Alabama'), State('California')])
37+
>>> ser
38+
0 'Alabama'
39+
1 'California'
40+
dtype: object
41+
42+
We would like direct access to the `abbrev` property and `fips` method.
43+
One option is to access these manually with `apply`:
44+
45+
>>> ser.apply(lambda x: x.fips())
46+
0 1
47+
1 6
48+
dtype: int64
49+
50+
But doing that repeatedly gets old in a hurry, so we decide to make a
51+
custom accessor. This entails subclassing `PandasDelegate` to specify
52+
what should be accessed and how.
53+
54+
There are four methods that *may* be defined in this subclass, one of which
55+
*must* be defined. The mandatory method is a classmethod called
56+
`_make_accessor`. `_make_accessor` is responsible doing any validation on
57+
inputs for the accessor. In this case, the inputs must be a Series
58+
containing State objects.
59+
60+
61+
class StateDelegate(PandasDelegate):
62+
63+
def __init__(self, values):
64+
self.values = values
1965
2066
@classmethod
21-
def _add_delegate_accessors(cls, delegate, accessors, typ,
22-
overwrite=False):
23-
"""
24-
add accessors to cls from the delegate class
25-
26-
Parameters
27-
----------
28-
cls : the class to add the methods/properties to
29-
delegate : the class to get methods/properties & doc-strings
30-
acccessors : string list of accessors to add
31-
typ : 'property' or 'method'
32-
overwrite : boolean, default False
33-
overwrite the method/property in the target class if it exists
34-
"""
67+
def _make_accessor(cls, data):
68+
if not isinstance(data, pd.Series):
69+
raise ValueError('Input must be a Series of States')
70+
elif not data.apply(lambda x: isinstance(x, State)).all():
71+
raise ValueError('All entries must be State objects')
72+
return StateDelegate(data)
73+
74+
75+
With `_make_accessor` defined, we have enough to create the accessor, but
76+
not enough to actually do anything useful with it. In order to access
77+
*methods* of State objects, we implement `_delegate_method`. `_delegate_method`
78+
calls the underlying method for each object in the series and wraps these
79+
in a new Series. The simplest version looks like:
80+
81+
def _delegate_method(self, name, *args, **kwargs):
82+
state_method = lambda x: getattr(x, name)(*args, **kwargs)
83+
return self.values.apply(state_method)
84+
85+
Similarly in order to access *properties* of State objects, we need to
86+
implement `_delegate_property_get`:
87+
88+
def _delegate_property_get(self, name):
89+
state_property = lambda x: getattr(x, name)
90+
return self.values.apply(state_property)
91+
92+
93+
On ocassion, we may want to be able to *set* property being accessed.
94+
This is discouraged, but allowed (as long as the class being accessed
95+
allows the property to be set). Doing so requires implementing
96+
`_delegate_property_set`:
97+
98+
def _delegate_property_set(self, name, new_values):
99+
for (obj, val) in zip(self.values, new_values):
100+
setattr(obj, name, val)
101+
102+
103+
With these implemented, `StateDelegate` knows how to handle methods and
104+
properties. We just need to tell it what names and properties it is
105+
supposed to handle. This is done by decorating the `StateDelegate`
106+
class with `pd.accessors.wrap_delegate_names`. We apply the decorator
107+
once with a list of all the methods the accessor should recognize and
108+
once with a list of all the properties the accessor should recognize.
109+
110+
111+
@wrap_delegate_names(delegate=State,
112+
accessors=["fips"],
113+
typ="method")
114+
@wrap_delegate_names(delegate=State,
115+
accessors=["abbrev"],
116+
typ="property")
117+
class StateDelegate(PandasDelegate):
118+
[...]
119+
120+
121+
We can now pin the `state` accessor to the pd.Series class (we could
122+
alternatively pin it to the pd.Index class with a slightly different
123+
implementation above):
124+
125+
pd.Series.state = accessors.AccessorProperty(StateDelegate)
126+
127+
128+
>>> ser = pd.Series([State('Alabama'), State('California')])
129+
>>> isinstance(ser.state, StateDelegate)
130+
True
131+
132+
>>> ser.state.abbrev
133+
0 AL
134+
1 CA
135+
dtype: object
136+
137+
>>> ser.state.fips()
138+
0 1
139+
1 6
140+
141+
>>> ser.state.abbrev = ['Foo', 'Bar']
142+
>>> ser.state.abbrev
143+
0 Foo
144+
1 Bar
145+
dtype: object
146+
35147
36-
def _create_delegator_property(name):
37148
38-
def _getter(self):
39-
return self._delegate_property_get(name)
149+
"""
150+
from pandas.core.base import PandasObject
151+
from pandas.core import common as com
40152

41-
def _setter(self, new_values):
42-
return self._delegate_property_set(name, new_values)
153+
class PandasDelegate(PandasObject):
154+
""" an abstract base class for delegating methods/properties
43155
44-
_getter.__name__ = name
45-
_setter.__name__ = name
156+
Usage: To make a custom accessor, start by subclassing `Delegate`.
157+
See example in the module-level docstring.
46158
47-
return property(fget=_getter, fset=_setter,
48-
doc=getattr(delegate, name).__doc__)
159+
"""
49160

50-
def _create_delegator_method(name):
161+
def __init__(self, values):
162+
self.values = values
163+
# #self._freeze()
51164

52-
def f(self, *args, **kwargs):
53-
return self._delegate_method(name, *args, **kwargs)
165+
@classmethod
166+
def _make_accessor(cls, data): # pragma: no cover
167+
raise NotImplementedError('It is up to subclasses to implement '
168+
'_make_accessor. This does input validation on the object to '
169+
'which the accessor is being pinned. '
170+
'It should return an instance of `cls`.')
54171

55-
f.__name__ = name
56-
f.__doc__ = getattr(delegate, name).__doc__
57172

58-
return f
173+
def _delegate_property_get(self, name, *args, **kwargs):
174+
raise TypeError("You cannot access the "
175+
"property {name}".format(name=name))
59176

60-
for name in accessors:
177+
def _delegate_property_set(self, name, value, *args, **kwargs):
178+
raise TypeError("The property {name} cannot be set".format(name=name))
61179

62-
if typ == 'property':
63-
f = _create_delegator_property(name)
64-
else:
65-
f = _create_delegator_method(name)
180+
def _delegate_method(self, name, *args, **kwargs):
181+
raise TypeError("You cannot call method {name}".format(name=name))
66182

67-
# don't overwrite existing methods/properties
68-
if overwrite or not hasattr(cls, name):
69-
setattr(cls, name, f)
70183

71184

72185
class AccessorProperty(object):
73186
"""Descriptor for implementing accessor properties like Series.str
74187
"""
75188

76-
def __init__(self, accessor_cls, construct_accessor):
189+
def __init__(self, accessor_cls, construct_accessor=None):
77190
self.accessor_cls = accessor_cls
191+
192+
if construct_accessor is None:
193+
# accessor_cls._make_accessor must be a classmethod
194+
construct_accessor = accessor_cls._make_accessor
195+
78196
self.construct_accessor = construct_accessor
79197
self.__doc__ = accessor_cls.__doc__
80198

@@ -89,3 +207,145 @@ def __set__(self, instance, value):
89207

90208
def __delete__(self, instance):
91209
raise AttributeError("can't delete attribute")
210+
211+
212+
class Delegator(object):
213+
""" Delegator class contains methods that are used by PandasDelegate
214+
and Accesor subclasses, but that so not ultimately belong in
215+
the namespaces of user-facing classes.
216+
217+
Many of these methods *could* be module-level functions, but are
218+
retained as staticmethods for organization purposes.
219+
"""
220+
221+
@staticmethod
222+
def create_delegator_property(name, delegate):
223+
# Note: we really only need the `delegate` here for the docstring
224+
225+
def _getter(self):
226+
return self._delegate_property_get(name)
227+
228+
def _setter(self, new_values):
229+
return self._delegate_property_set(name, new_values)
230+
# TODO: not hit in tests; not sure this is something we
231+
# really want anyway
232+
233+
_getter.__name__ = name
234+
_setter.__name__ = name
235+
_doc = getattr(delegate, name).__doc__
236+
return property(fget=_getter, fset=_setter, doc=_doc)
237+
238+
239+
@staticmethod
240+
def create_delegator_method(name, delegate):
241+
# Note: we really only need the `delegate` here for the docstring
242+
243+
def func(self, *args, **kwargs):
244+
return self._delegate_method(name, *args, **kwargs)
245+
246+
if callable(name):
247+
# A function/method was passed directly instead of a name
248+
# This may also render the `delegate` arg unnecessary.
249+
func.__name__ = name.__name__ # TODO: is this generally valid?
250+
func.__doc__ = name.__doc__
251+
else:
252+
func.__name__ = name
253+
func.__doc__ = getattr(delegate, name).__doc__
254+
return func
255+
256+
257+
@staticmethod
258+
def delegate_names(delegate, accessors, typ, overwrite=False):
259+
"""
260+
delegate_names decorates class definitions, e.g:
261+
262+
@delegate_names(Categorical, ["categories", "ordered"], "property")
263+
class CategoricalAccessor(PandasDelegate):
264+
265+
@classmethod
266+
def _make_accessor(cls, data):
267+
[...]
268+
269+
270+
This replaces the older usage in which following a class definition
271+
we would use `Foo._add_delegate_accessors(...)`. The motivation
272+
is that we would like to keep as much of a class's internals inside
273+
the class definition. For things that we cannot keep directly
274+
in the class definition, a decorator is more directly tied to
275+
the definition than a method call outside the definition.
276+
277+
"""
278+
# Note: we really only need the `delegate` here for the docstring
279+
280+
def add_delegate_accessors(cls):
281+
"""
282+
add accessors to cls from the delegate class
283+
284+
Parameters
285+
----------
286+
cls : the class to add the methods/properties to
287+
delegate : the class to get methods/properties & doc-strings
288+
acccessors : string list of accessors to add
289+
typ : 'property' or 'method'
290+
overwrite : boolean, default False
291+
overwrite the method/property in the target class if it exists
292+
"""
293+
for name in accessors:
294+
if typ == "property":
295+
func = Delegator.create_delegator_property(name, delegate)
296+
else:
297+
func = Delegator.create_delegator_method(name, delegate)
298+
299+
# Allow for a callable to be passed instead of a name.
300+
title = com._get_callable_name(name)
301+
title = title or name
302+
# don't overwrite existing methods/properties unless
303+
# specifically told to do so
304+
if overwrite or not hasattr(cls, title):
305+
setattr(cls, title, func)
306+
307+
return cls
308+
309+
return add_delegate_accessors
310+
311+
312+
313+
wrap_delegate_names = Delegator.delegate_names
314+
# TODO: the `delegate` arg to `wrap_delegate_names` is really only relevant
315+
# for a docstring. It'd be nice if we didn't require it and could duck-type
316+
# instead.
317+
318+
# TODO: There are 2-3 implementations of `_delegate_method`
319+
# and `_delegate_property` that are common enough that we should consider
320+
# making them the defaults. First, if the series being accessed has `name`
321+
# method/property:
322+
#
323+
# def _delegate_method(self, name, *args, **kwargs):
324+
# result = getattr(self.values, name)(*args, **kwargs)
325+
# return result
326+
#
327+
# def _delegate_property_get(self, name):
328+
# result = getattr(self.values, name)
329+
# return result
330+
#
331+
#
332+
# Alternately if the series being accessed does not have this attribute,
333+
# but is a series of objects that do have the attribute:
334+
#
335+
# def _delegate_method(self, name, *args, **kwargs):
336+
# meth = lambda x: getattr(x, name)(*args, **kwargs)
337+
# return self.values.apply(meth)
338+
#
339+
# def _delegate_property_get(self, name):
340+
# prop = lambda x: getattr(x, name)
341+
# return self.values.apply(prop)
342+
#
343+
#
344+
# `apply` would need to be changed to `map` if self.values is an Index.
345+
#
346+
# The third thing to consider moving into the general case is
347+
# core.strings.StringMethods._wrap_result, which handles a bunch of cases
348+
# for how to wrap delegated outputs.
349+
350+
351+

0 commit comments

Comments
 (0)