Skip to content

Commit 764e8c4

Browse files
committed
Refactor map to use common code for series and index when possible and add dict performance test
1 parent b95b960 commit 764e8c4

File tree

5 files changed

+107
-58
lines changed

5 files changed

+107
-58
lines changed

asv_bench/benchmarks/series_methods.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,30 @@ def time_series_dropna_datetime(self):
123123
self.s.dropna()
124124

125125

126+
class series_map_dict(object):
127+
goal_time = 0.2
128+
129+
def setup(self):
130+
map_size = 1000
131+
self.s = Series(np.random.randint(0, map_size, 10000))
132+
self.map_dict = {i: map_size - i for i in range(map_size)}
133+
134+
def time_series_map_dict(self):
135+
self.s.map(self.map_dict)
136+
137+
138+
class series_map_series(object):
139+
goal_time = 0.2
140+
141+
def setup(self):
142+
map_size = 1000
143+
self.s = Series(np.random.randint(0, map_size, 10000))
144+
self.map_series = Series(map_size - np.arange(map_size))
145+
146+
def time_series_map_series(self):
147+
self.s.map(self.map_series)
148+
149+
126150
class series_clip(object):
127151
goal_time = 0.2
128152

pandas/core/base.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from pandas.util._validators import validate_bool_kwarg
1818

19-
from pandas.core import common as com
19+
from pandas.core import common as com, algorithms
2020
import pandas.core.nanops as nanops
2121
import pandas._libs.lib as lib
2222
from pandas.compat.numpy import function as nv
@@ -838,6 +838,51 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
838838
klass=self.__class__.__name__, op=name))
839839
return func(**kwds)
840840

841+
def _map_values(self, values, arg, na_action=None):
842+
if is_extension_type(self.dtype):
843+
if na_action is not None:
844+
raise NotImplementedError
845+
map_f = lambda values, f: values.map(f)
846+
else:
847+
if na_action == 'ignore':
848+
def map_f(values, f):
849+
return lib.map_infer_mask(values, f,
850+
isnull(values).view(np.uint8))
851+
else:
852+
map_f = lib.map_infer
853+
854+
map_values = None
855+
if isinstance(arg, dict):
856+
if hasattr(arg, '__missing__'):
857+
# If a dictionary subclass defines a default value method,
858+
# convert arg to a lookup function (GH #15999).
859+
dict_with_default = arg
860+
arg = lambda x: dict_with_default[x]
861+
else:
862+
# Dictionary does not have a default. Thus it's safe to
863+
# convert to an Index for efficiency.
864+
from pandas import Index
865+
idx = Index(arg.keys())
866+
# Cast to dict so we can get values using lib.fast_multiget
867+
# if this is a dict subclass (GH #15999)
868+
map_values = idx._get_values_from_dict(dict(arg))
869+
arg = idx
870+
elif isinstance(arg, ABCSeries):
871+
map_values = arg.values
872+
arg = arg.index
873+
874+
if map_values is not None:
875+
# Since values were input this means we came from either
876+
# a dict or a series and arg should be an index
877+
indexer = arg.get_indexer(values)
878+
new_values = algorithms.take_1d(map_values, indexer)
879+
else:
880+
# arg is a function
881+
new_values = map_f(values, arg)
882+
883+
return new_values
884+
885+
841886
def value_counts(self, normalize=False, sort=True, ascending=False,
842887
bins=None, dropna=True):
843888
"""

pandas/core/indexes/base.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2827,7 +2827,7 @@ def get_indexer_for(self, target, **kwargs):
28272827
28282828
Parameters
28292829
----------
2830-
data : dict
2830+
data : {dict, DictWithoutMissing}
28312831
The dictionary from which to extract the values
28322832
28332833
Returns
@@ -2879,43 +2879,36 @@ def groupby(self, values):
28792879

28802880
return result
28812881

2882-
def map(self, mapper):
2883-
"""Apply mapper function to an index.
2882+
def map(self, arg, na_action=None):
2883+
"""Map values of Series using input correspondence (which can be a
2884+
dict, Series, or function)
28842885
28852886
Parameters
28862887
----------
2887-
mapper : {callable, dict, Series}
2888-
Function to be applied or input correspondence object.
2889-
dict and Series support new in 0.20.0.
2888+
arg : function, dict, or Series
2889+
na_action : {None, 'ignore'}
2890+
If 'ignore', propagate NA values, without passing them to the
2891+
mapping function
28902892
28912893
Returns
28922894
-------
2893-
applied : Union[Index, MultiIndex], inferred
2895+
applied : {Index, MultiIndex}, inferred
28942896
The output of the mapping function applied to the index.
28952897
If the function returns a tuple with more than one element
28962898
a MultiIndex will be returned.
28972899
28982900
"""
2899-
from .multi import MultiIndex
2900-
2901-
if isinstance(mapper, ABCSeries):
2902-
indexer = mapper.index.get_indexer(self.values)
2903-
mapped_values = algos.take_1d(mapper.values, indexer)
2904-
elif isinstance(mapper, dict):
2905-
idx = Index(mapper.keys())
2906-
data = idx._get_values_from_dict(mapper)
2907-
indexer = idx.get_indexer(self.values)
2908-
mapped_values = algos.take_1d(data, indexer)
2909-
else:
2910-
mapped_values = self._arrmap(self.values, mapper)
29112901

2902+
from .multi import MultiIndex
2903+
new_values = super(Index, self)._map_values(
2904+
self.values, arg, na_action=na_action)
29122905
attributes = self._get_attributes_dict()
2913-
if mapped_values.size and isinstance(mapped_values[0], tuple):
2914-
return MultiIndex.from_tuples(mapped_values,
2906+
if new_values.size and isinstance(new_values[0], tuple):
2907+
return MultiIndex.from_tuples(new_values,
29152908
names=attributes.get('name'))
29162909

29172910
attributes['copy'] = False
2918-
return Index(mapped_values, **attributes)
2911+
return Index(new_values, **attributes)
29192912

29202913
def isin(self, values, level=None):
29212914
"""

pandas/core/series.py

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2263,43 +2263,13 @@ def map(self, arg, na_action=None):
22632263
3 0
22642264
dtype: int64
22652265
"""
2266-
22672266
if is_extension_type(self.dtype):
2268-
values = self._values
2269-
if na_action is not None:
2270-
raise NotImplementedError
2271-
map_f = lambda values, f: values.map(f)
2267+
input_values = self._values
22722268
else:
2273-
values = self.asobject
2274-
2275-
if na_action == 'ignore':
2276-
def map_f(values, f):
2277-
return lib.map_infer_mask(values, f,
2278-
isna(values).view(np.uint8))
2279-
else:
2280-
map_f = lib.map_infer
2281-
2282-
if isinstance(arg, dict):
2283-
if hasattr(arg, '__missing__'):
2284-
# If a dictionary subclass defines a default value method,
2285-
# convert arg to a lookup function (GH #15999).
2286-
dict_with_default = arg
2287-
arg = lambda x: dict_with_default[x]
2288-
else:
2289-
# Dictionary does not have a default. Thus it's safe to
2290-
# convert to an indexed series for efficiency.
2291-
arg = self._constructor(arg, index=arg.keys())
2292-
2293-
if isinstance(arg, Series):
2294-
# arg is a Series
2295-
indexer = arg.index.get_indexer(values)
2296-
new_values = algorithms.take_1d(arg._values, indexer)
2297-
else:
2298-
# arg is a function
2299-
new_values = map_f(values, arg)
2300-
2301-
return self._constructor(new_values,
2302-
index=self.index).__finalize__(self)
2269+
input_values = self.asobject
2270+
new_values = super(Series, self)._map_values(
2271+
input_values, arg, na_action=na_action)
2272+
return self._constructor(new_values, index=self.index).__finalize__(self)
23032273

23042274
def _gotitem(self, key, ndim, subset=None):
23052275
"""

pandas/tests/indexes/test_base.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
from datetime import datetime, timedelta
66

7+
from collections import defaultdict
8+
79
import pandas.util.testing as tm
810
from pandas.core.indexes.api import Index, MultiIndex
911
from pandas.tests.indexes.common import Base
@@ -867,6 +869,21 @@ def test_map_with_non_function_missing_values(self):
867869
mapper = {0: 'foo', 2: 2.0, -1: 'baz'}
868870
tm.assert_index_equal(expected, input.map(mapper))
869871

872+
def test_map_na_exclusion(self):
873+
idx = Index([1.5, np.nan, 3, np.nan, 5])
874+
875+
result = idx.map(lambda x: x * 2, na_action='ignore')
876+
exp = idx * 2
877+
tm.assert_index_equal(result, exp)
878+
879+
def test_map_defaultdict(self):
880+
idx = Index([1, 2, 3])
881+
default_dict = defaultdict(lambda: 'blank')
882+
default_dict[1] = 'stuff'
883+
result = idx.map(default_dict)
884+
expected = Index(['stuff', 'blank', 'blank'])
885+
tm.assert_index_equal(result, expected)
886+
870887
def test_append_multiple(self):
871888
index = Index(['a', 'b', 'c', 'd', 'e', 'f'])
872889

0 commit comments

Comments
 (0)