1
1
#!/usr/bin/env python
2
2
# -*- coding: utf-8 -*-
3
+ """
3
4
4
- from pandas . core . base import PandasObject
5
+ An example/recipe for creating a custom accessor.
5
6
6
7
7
- class PandasDelegate (PandasObject ):
8
- """ an abstract base class for delegating methods/properties """
8
+ The primary use case for accessors is when a Series contains instances
9
+ of a particular class and we want to access properties/methods of these
10
+ instances in Series form.
9
11
10
- def _delegate_property_get (self , name , * args , ** kwargs ):
11
- raise TypeError ("You cannot access the "
12
- "property {name}" .format (name = name ))
12
+ Suppose we have a custom State class representing US states:
13
13
14
- def _delegate_property_set (self , name , value , * args , ** kwargs ):
15
- raise TypeError ("The property {name} cannot be set" .format (name = name ))
14
+ class State(object):
15
+ def __repr__(self):
16
+ return repr(self.name)
16
17
17
- def _delegate_method (self , name , * args , ** kwargs ):
18
- raise TypeError ("You cannot call method {name}" .format (name = name ))
18
+ def __init__(self, name):
19
+ self.name = name
20
+ self._abbrev_dict = {'California': 'CA', 'Alabama': 'AL'}
21
+
22
+ @property
23
+ def abbrev(self):
24
+ return self._abbrev_dict[self.name]
25
+
26
+ @abbrev.setter
27
+ def abbrev(self, value):
28
+ self._abbrev_dict[self.name] = value
29
+
30
+ def fips(self):
31
+ return {'California': 6, 'Alabama': 1}[self.name]
32
+
33
+
34
+ We can construct a series of these objects:
35
+
36
+ >>> ser = pd.Series([State('Alabama'), State('California')])
37
+ >>> ser
38
+ 0 'Alabama'
39
+ 1 'California'
40
+ dtype: object
41
+
42
+ We would like direct access to the `abbrev` property and `fips` method.
43
+ One option is to access these manually with `apply`:
44
+
45
+ >>> ser.apply(lambda x: x.fips())
46
+ 0 1
47
+ 1 6
48
+ dtype: int64
49
+
50
+ But doing that repeatedly gets old in a hurry, so we decide to make a
51
+ custom accessor. This entails subclassing `PandasDelegate` to specify
52
+ what should be accessed and how.
53
+
54
+ There are four methods that *may* be defined in this subclass, one of which
55
+ *must* be defined. The mandatory method is a classmethod called
56
+ `_make_accessor`. `_make_accessor` is responsible doing any validation on
57
+ inputs for the accessor. In this case, the inputs must be a Series
58
+ containing State objects.
59
+
60
+
61
+ class StateDelegate(PandasDelegate):
62
+
63
+ def __init__(self, values):
64
+ self.values = values
19
65
20
66
@classmethod
21
- def _add_delegate_accessors (cls , delegate , accessors , typ ,
22
- overwrite = False ):
23
- """
24
- add accessors to cls from the delegate class
25
-
26
- Parameters
27
- ----------
28
- cls : the class to add the methods/properties to
29
- delegate : the class to get methods/properties & doc-strings
30
- acccessors : string list of accessors to add
31
- typ : 'property' or 'method'
32
- overwrite : boolean, default False
33
- overwrite the method/property in the target class if it exists
34
- """
67
+ def _make_accessor(cls, data):
68
+ if not isinstance(data, pd.Series):
69
+ raise ValueError('Input must be a Series of States')
70
+ elif not data.apply(lambda x: isinstance(x, State)).all():
71
+ raise ValueError('All entries must be State objects')
72
+ return StateDelegate(data)
73
+
74
+
75
+ With `_make_accessor` defined, we have enough to create the accessor, but
76
+ not enough to actually do anything useful with it. In order to access
77
+ *methods* of State objects, we implement `_delegate_method`. `_delegate_method`
78
+ calls the underlying method for each object in the series and wraps these
79
+ in a new Series. The simplest version looks like:
80
+
81
+ def _delegate_method(self, name, *args, **kwargs):
82
+ state_method = lambda x: getattr(x, name)(*args, **kwargs)
83
+ return self.values.apply(state_method)
84
+
85
+ Similarly in order to access *properties* of State objects, we need to
86
+ implement `_delegate_property_get`:
87
+
88
+ def _delegate_property_get(self, name):
89
+ state_property = lambda x: getattr(x, name)
90
+ return self.values.apply(state_property)
91
+
92
+
93
+ On ocassion, we may want to be able to *set* property being accessed.
94
+ This is discouraged, but allowed (as long as the class being accessed
95
+ allows the property to be set). Doing so requires implementing
96
+ `_delegate_property_set`:
97
+
98
+ def _delegate_property_set(self, name, new_values):
99
+ for (obj, val) in zip(self.values, new_values):
100
+ setattr(obj, name, val)
101
+
102
+
103
+ With these implemented, `StateDelegate` knows how to handle methods and
104
+ properties. We just need to tell it what names and properties it is
105
+ supposed to handle. This is done by decorating the `StateDelegate`
106
+ class with `pd.accessors.wrap_delegate_names`. We apply the decorator
107
+ once with a list of all the methods the accessor should recognize and
108
+ once with a list of all the properties the accessor should recognize.
109
+
110
+
111
+ @wrap_delegate_names(delegate=State,
112
+ accessors=["fips"],
113
+ typ="method")
114
+ @wrap_delegate_names(delegate=State,
115
+ accessors=["abbrev"],
116
+ typ="property")
117
+ class StateDelegate(PandasDelegate):
118
+ [...]
119
+
120
+
121
+ We can now pin the `state` accessor to the pd.Series class (we could
122
+ alternatively pin it to the pd.Index class with a slightly different
123
+ implementation above):
124
+
125
+ pd.Series.state = accessors.AccessorProperty(StateDelegate)
126
+
127
+
128
+ >>> ser = pd.Series([State('Alabama'), State('California')])
129
+ >>> isinstance(ser.state, StateDelegate)
130
+ True
131
+
132
+ >>> ser.state.abbrev
133
+ 0 AL
134
+ 1 CA
135
+ dtype: object
136
+
137
+ >>> ser.state.fips()
138
+ 0 1
139
+ 1 6
140
+
141
+ >>> ser.state.abbrev = ['Foo', 'Bar']
142
+ >>> ser.state.abbrev
143
+ 0 Foo
144
+ 1 Bar
145
+ dtype: object
146
+
35
147
36
- def _create_delegator_property (name ):
37
148
38
- def _getter (self ):
39
- return self ._delegate_property_get (name )
149
+ """
150
+ from pandas .core .base import PandasObject
151
+ from pandas .core import common as com
40
152
41
- def _setter ( self , new_values ):
42
- return self . _delegate_property_set ( name , new_values )
153
+ class PandasDelegate ( PandasObject ):
154
+ """ an abstract base class for delegating methods/properties
43
155
44
- _getter . __name__ = name
45
- _setter . __name__ = name
156
+ Usage: To make a custom accessor, start by subclassing `Delegate`.
157
+ See example in the module-level docstring.
46
158
47
- return property (fget = _getter , fset = _setter ,
48
- doc = getattr (delegate , name ).__doc__ )
159
+ """
49
160
50
- def _create_delegator_method (name ):
161
+ def __init__ (self , values ):
162
+ self .values = values
163
+ # #self._freeze()
51
164
52
- def f (self , * args , ** kwargs ):
53
- return self ._delegate_method (name , * args , ** kwargs )
165
+ @classmethod
166
+ def _make_accessor (cls , data ): # pragma: no cover
167
+ raise NotImplementedError ('It is up to subclasses to implement '
168
+ '_make_accessor. This does input validation on the object to '
169
+ 'which the accessor is being pinned. '
170
+ 'It should return an instance of `cls`.' )
54
171
55
- f .__name__ = name
56
- f .__doc__ = getattr (delegate , name ).__doc__
57
172
58
- return f
173
+ def _delegate_property_get (self , name , * args , ** kwargs ):
174
+ raise TypeError ("You cannot access the "
175
+ "property {name}" .format (name = name ))
59
176
60
- for name in accessors :
177
+ def _delegate_property_set (self , name , value , * args , ** kwargs ):
178
+ raise TypeError ("The property {name} cannot be set" .format (name = name ))
61
179
62
- if typ == 'property' :
63
- f = _create_delegator_property (name )
64
- else :
65
- f = _create_delegator_method (name )
180
+ def _delegate_method (self , name , * args , ** kwargs ):
181
+ raise TypeError ("You cannot call method {name}" .format (name = name ))
66
182
67
- # don't overwrite existing methods/properties
68
- if overwrite or not hasattr (cls , name ):
69
- setattr (cls , name , f )
70
183
71
184
72
185
class AccessorProperty (object ):
73
186
"""Descriptor for implementing accessor properties like Series.str
74
187
"""
75
188
76
- def __init__ (self , accessor_cls , construct_accessor ):
189
+ def __init__ (self , accessor_cls , construct_accessor = None ):
77
190
self .accessor_cls = accessor_cls
191
+
192
+ if construct_accessor is None :
193
+ # accessor_cls._make_accessor must be a classmethod
194
+ construct_accessor = accessor_cls ._make_accessor
195
+
78
196
self .construct_accessor = construct_accessor
79
197
self .__doc__ = accessor_cls .__doc__
80
198
@@ -89,3 +207,145 @@ def __set__(self, instance, value):
89
207
90
208
def __delete__ (self , instance ):
91
209
raise AttributeError ("can't delete attribute" )
210
+
211
+
212
+ class Delegator (object ):
213
+ """ Delegator class contains methods that are used by PandasDelegate
214
+ and Accesor subclasses, but that so not ultimately belong in
215
+ the namespaces of user-facing classes.
216
+
217
+ Many of these methods *could* be module-level functions, but are
218
+ retained as staticmethods for organization purposes.
219
+ """
220
+
221
+ @staticmethod
222
+ def create_delegator_property (name , delegate ):
223
+ # Note: we really only need the `delegate` here for the docstring
224
+
225
+ def _getter (self ):
226
+ return self ._delegate_property_get (name )
227
+
228
+ def _setter (self , new_values ):
229
+ return self ._delegate_property_set (name , new_values )
230
+ # TODO: not hit in tests; not sure this is something we
231
+ # really want anyway
232
+
233
+ _getter .__name__ = name
234
+ _setter .__name__ = name
235
+ _doc = getattr (delegate , name ).__doc__
236
+ return property (fget = _getter , fset = _setter , doc = _doc )
237
+
238
+
239
+ @staticmethod
240
+ def create_delegator_method (name , delegate ):
241
+ # Note: we really only need the `delegate` here for the docstring
242
+
243
+ def func (self , * args , ** kwargs ):
244
+ return self ._delegate_method (name , * args , ** kwargs )
245
+
246
+ if callable (name ):
247
+ # A function/method was passed directly instead of a name
248
+ # This may also render the `delegate` arg unnecessary.
249
+ func .__name__ = name .__name__ # TODO: is this generally valid?
250
+ func .__doc__ = name .__doc__
251
+ else :
252
+ func .__name__ = name
253
+ func .__doc__ = getattr (delegate , name ).__doc__
254
+ return func
255
+
256
+
257
+ @staticmethod
258
+ def delegate_names (delegate , accessors , typ , overwrite = False ):
259
+ """
260
+ delegate_names decorates class definitions, e.g:
261
+
262
+ @delegate_names(Categorical, ["categories", "ordered"], "property")
263
+ class CategoricalAccessor(PandasDelegate):
264
+
265
+ @classmethod
266
+ def _make_accessor(cls, data):
267
+ [...]
268
+
269
+
270
+ This replaces the older usage in which following a class definition
271
+ we would use `Foo._add_delegate_accessors(...)`. The motivation
272
+ is that we would like to keep as much of a class's internals inside
273
+ the class definition. For things that we cannot keep directly
274
+ in the class definition, a decorator is more directly tied to
275
+ the definition than a method call outside the definition.
276
+
277
+ """
278
+ # Note: we really only need the `delegate` here for the docstring
279
+
280
+ def add_delegate_accessors (cls ):
281
+ """
282
+ add accessors to cls from the delegate class
283
+
284
+ Parameters
285
+ ----------
286
+ cls : the class to add the methods/properties to
287
+ delegate : the class to get methods/properties & doc-strings
288
+ acccessors : string list of accessors to add
289
+ typ : 'property' or 'method'
290
+ overwrite : boolean, default False
291
+ overwrite the method/property in the target class if it exists
292
+ """
293
+ for name in accessors :
294
+ if typ == "property" :
295
+ func = Delegator .create_delegator_property (name , delegate )
296
+ else :
297
+ func = Delegator .create_delegator_method (name , delegate )
298
+
299
+ # Allow for a callable to be passed instead of a name.
300
+ title = com ._get_callable_name (name )
301
+ title = title or name
302
+ # don't overwrite existing methods/properties unless
303
+ # specifically told to do so
304
+ if overwrite or not hasattr (cls , title ):
305
+ setattr (cls , title , func )
306
+
307
+ return cls
308
+
309
+ return add_delegate_accessors
310
+
311
+
312
+
313
+ wrap_delegate_names = Delegator .delegate_names
314
+ # TODO: the `delegate` arg to `wrap_delegate_names` is really only relevant
315
+ # for a docstring. It'd be nice if we didn't require it and could duck-type
316
+ # instead.
317
+
318
+ # TODO: There are 2-3 implementations of `_delegate_method`
319
+ # and `_delegate_property` that are common enough that we should consider
320
+ # making them the defaults. First, if the series being accessed has `name`
321
+ # method/property:
322
+ #
323
+ # def _delegate_method(self, name, *args, **kwargs):
324
+ # result = getattr(self.values, name)(*args, **kwargs)
325
+ # return result
326
+ #
327
+ # def _delegate_property_get(self, name):
328
+ # result = getattr(self.values, name)
329
+ # return result
330
+ #
331
+ #
332
+ # Alternately if the series being accessed does not have this attribute,
333
+ # but is a series of objects that do have the attribute:
334
+ #
335
+ # def _delegate_method(self, name, *args, **kwargs):
336
+ # meth = lambda x: getattr(x, name)(*args, **kwargs)
337
+ # return self.values.apply(meth)
338
+ #
339
+ # def _delegate_property_get(self, name):
340
+ # prop = lambda x: getattr(x, name)
341
+ # return self.values.apply(prop)
342
+ #
343
+ #
344
+ # `apply` would need to be changed to `map` if self.values is an Index.
345
+ #
346
+ # The third thing to consider moving into the general case is
347
+ # core.strings.StringMethods._wrap_result, which handles a bunch of cases
348
+ # for how to wrap delegated outputs.
349
+
350
+
351
+
0 commit comments