Skip to content

Commit e5c933b

Browse files
committed
ENH: add map_infer function, use to speed up Series.map/apply, close PR #355
1 parent 3b45789 commit e5c933b

File tree

3 files changed

+40
-7
lines changed

3 files changed

+40
-7
lines changed

RELEASE.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ pandas 0.5.1
5050
- Add `orient` option to `Panel.from_dict` to ease creation of mixed-type
5151
Panels (GH #359)
5252
- Add `DataFrame.from_dict` with similar `orient` option
53-
- Can pass list of tuples or list of lists to `DataFrame.from_records` for
54-
fast conversion to DataFrame (GH #357)
53+
- Can now pass list of tuples or list of lists to `DataFrame.from_records`
54+
for fast conversion to DataFrame (GH #357)
5555
5656
**Improvements to existing features**
5757

@@ -72,6 +72,9 @@ pandas 0.5.1
7272
- Raise exception if dateutil 2.0 installed on Python 2.x runtime (GH #346)
7373
- Significant GroupBy performance enhancement with multiple keys with many
7474
"empty" combinations
75+
- New Cython vectorized function `map_infer` speeds up `Series.apply` and
76+
`Series.map` significantly when passed elementwise Python function,
77+
motivated by PR #355
7578

7679
**Bug fixes**
7780

@@ -114,6 +117,7 @@ Thanks
114117
- Jev Kuznetsov
115118
- Dieter Vandenbussche
116119
- rsamson
120+
- Aman Thakral
117121

118122
pandas 0.5.0
119123
============

pandas/core/series.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,8 +1331,8 @@ def map(self, arg):
13311331
new_values = common.take_1d(np.asarray(arg), indexer)
13321332
return Series(new_values, index=self.index, name=self.name)
13331333
else:
1334-
return Series([arg(x) for x in self], index=self.index,
1335-
name=self.name)
1334+
mapped = lib.map_infer(self.values, arg)
1335+
return Series(mapped, index=self.index, name=self.name)
13361336

13371337
def apply(self, func):
13381338
"""
@@ -1353,8 +1353,8 @@ def apply(self, func):
13531353
result = Series(result, index=self.index, name=self.name)
13541354
return result
13551355
except Exception:
1356-
return Series([func(x) for x in self], index=self.index,
1357-
name=self.name)
1356+
mapped = lib.map_infer(self.values, func)
1357+
return Series(mapped, index=self.index, name=self.name)
13581358

13591359
def align(self, other, join='outer', copy=True):
13601360
"""

pandas/src/tseries.pyx

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def ismember(ndarray arr, set values):
186186
it = <flatiter> PyArray_IterNew(arr)
187187
n = len(arr)
188188
result = np.empty(n, dtype=np.uint8)
189-
for i from 0 <= i < n:
189+
for i in range(n):
190190
val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
191191
if val in values:
192192
result[i] = 1
@@ -196,6 +196,35 @@ def ismember(ndarray arr, set values):
196196

197197
return result.view(np.bool_)
198198

199+
def map_infer(ndarray arr, object f):
200+
'''
201+
Substitute for np.vectorize with pandas-friendly dtype inference
202+
203+
Parameters
204+
----------
205+
arr : ndarray
206+
f : function
207+
208+
Returns
209+
-------
210+
mapped : ndarray
211+
'''
212+
cdef:
213+
Py_ssize_t i, n
214+
flatiter it
215+
ndarray[object] result
216+
object val
217+
218+
it = <flatiter> PyArray_IterNew(arr)
219+
n = len(arr)
220+
result = np.empty(n, dtype=object)
221+
for i in range(n):
222+
val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
223+
result[i] = f(val)
224+
PyArray_ITER_NEXT(it)
225+
226+
return maybe_convert_objects(result)
227+
199228
#----------------------------------------------------------------------
200229
# datetime / io related
201230

0 commit comments

Comments
 (0)