Skip to content

Commit 184feb7

Browse files
committed
Improve dictionary map performance on category series, fixes pandas-dev#23785
1 parent 181f972 commit 184feb7

File tree

3 files changed

+22
-7
lines changed

3 files changed

+22
-7
lines changed

asv_bench/benchmarks/series_methods.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,25 @@ def time_searchsorted(self, dtype):
145145

146146
class Map(object):
147147

148-
params = ['dict', 'Series']
148+
params = (['dict', 'Series', 'lambda'], ['object', 'category', 'int'])
149149
param_names = 'mapper'
150150

151-
def setup(self, mapper):
151+
def setup(self, mapper, dtype):
152152
map_size = 1000
153-
map_data = Series(map_size - np.arange(map_size))
154-
self.map_data = map_data if mapper == 'Series' else map_data.to_dict()
155-
self.s = Series(np.random.randint(0, map_size, 10000))
153+
map_data = Series(map_size - np.arange(map_size), dtype=dtype)
156154

157-
def time_map(self, mapper):
155+
# construct mapper
156+
if mapper == 'Series':
157+
self.map_data = map_data
158+
elif mapper == 'lambda':
159+
self.map_data = map_data.to_dict()
160+
else:
161+
map_dict = map_data.to_dict()
162+
self.map_data = lambda x: map_dict[x]
163+
164+
self.s = Series(np.random.randint(0, map_size, 10000), dtype=dtype)
165+
166+
def time_map(self, mapper, *args, **kwargs):
158167
self.s.map(self.map_data)
159168

160169

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ Performance Improvements
248248
- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
249249
- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
250250
- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
251+
- Improved performance of :meth:`pandas.core.base.IndexOpsMixin._map_values` for dictionary mappers on categorical series by using the default series map method (:issue:`23785`)
251252

252253
.. _whatsnew_0250.bug_fixes:
253254

pandas/core/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from pandas.core.dtypes.common import (
2020
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimelike,
2121
is_extension_array_dtype, is_extension_type, is_list_like, is_object_dtype,
22-
is_scalar, is_timedelta64_ns_dtype)
22+
is_scalar, is_timedelta64_ns_dtype, is_categorical_dtype)
2323
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
2424
from pandas.core.dtypes.missing import isna
2525

@@ -1201,6 +1201,11 @@ def _map_values(self, mapper, na_action=None):
12011201
else:
12021202
values = self.values
12031203

1204+
if is_categorical_dtype(values):
1205+
# use the built in categorical series mapper which saves
1206+
# time by mapping the categories instead of all values
1207+
return values.map(mapper)
1208+
12041209
indexer = mapper.index.get_indexer(values)
12051210
new_values = algorithms.take_1d(mapper._values, indexer)
12061211

0 commit comments

Comments
 (0)