Skip to content

Commit 80ab011

Browse files
committed
Improve dictionary map performance on category series, fixes pandas-dev#23785
1 parent 181f972 commit 80ab011

File tree

2 files changed

+20
-7
lines changed

2 files changed

+20
-7
lines changed

asv_bench/benchmarks/series_methods.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,25 @@ def time_searchsorted(self, dtype):
145145

146146
class Map(object):
147147

148-
params = ['dict', 'Series']
148+
params = (['dict', 'Series', 'lambda'], ['object', 'category', 'int'])
149149
param_names = 'mapper'
150150

151-
def setup(self, mapper):
151+
def setup(self, mapper, dtype):
152152
map_size = 1000
153-
map_data = Series(map_size - np.arange(map_size))
154-
self.map_data = map_data if mapper == 'Series' else map_data.to_dict()
155-
self.s = Series(np.random.randint(0, map_size, 10000))
153+
map_data = Series(map_size - np.arange(map_size), dtype=dtype)
156154

157-
def time_map(self, mapper):
155+
# construct mapper
156+
if mapper == 'Series':
157+
self.map_data = map_data
158+
elif mapper == 'lambda':
159+
self.map_data = map_data.to_dict()
160+
else:
161+
map_dict = map_data.to_dict()
162+
self.map_data = lambda x: map_dict[x]
163+
164+
self.s = Series(np.random.randint(0, map_size, 10000), dtype=dtype)
165+
166+
def time_map(self, mapper, *args, **kwargs):
158167
self.s.map(self.map_data)
159168

160169

pandas/core/base.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from pandas.core.dtypes.common import (
2020
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimelike,
2121
is_extension_array_dtype, is_extension_type, is_list_like, is_object_dtype,
22-
is_scalar, is_timedelta64_ns_dtype)
22+
is_scalar, is_timedelta64_ns_dtype, is_categorical_dtype)
2323
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
2424
from pandas.core.dtypes.missing import isna
2525

@@ -1201,6 +1201,10 @@ def _map_values(self, mapper, na_action=None):
12011201
else:
12021202
values = self.values
12031203

1204+
# only need to map the categories not all values
1205+
if is_categorical_dtype(values):
1206+
return values.map(mapper)
1207+
12041208
indexer = mapper.index.get_indexer(values)
12051209
new_values = algorithms.take_1d(mapper._values, indexer)
12061210

0 commit comments

Comments
 (0)