Skip to content

Commit f2ab033

Browse files
jbrockmendelproost
authored andcommitted
REF: pre-allocate results in libreduction (pandas-dev#29550)
1 parent 811e4fc commit f2ab033

File tree

1 file changed

+22
-22
lines changed

1 file changed

+22
-22
lines changed

pandas/_libs/reduction.pyx

+22-22
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,13 @@ cimport pandas._libs.util as util
1818
from pandas._libs.lib import maybe_convert_objects
1919

2020

21-
cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
21+
cdef _check_result_array(object obj, Py_ssize_t cnt):
2222

2323
if (util.is_array(obj) or
2424
(isinstance(obj, list) and len(obj) == cnt) or
2525
getattr(obj, 'shape', None) == (cnt,)):
2626
raise ValueError('Function does not reduce')
2727

28-
return np.empty(size, dtype='O')
29-
3028

3129
cdef bint _is_sparse_array(object obj):
3230
# TODO can be removed one SparseArray.values is removed (GH26421)
@@ -116,6 +114,9 @@ cdef class Reducer:
116114
has_index = self.index is not None
117115
incr = self.increment
118116

117+
result = np.empty(self.nresults, dtype='O')
118+
it = <flatiter>PyArray_IterNew(result)
119+
119120
try:
120121
for i in range(self.nresults):
121122

@@ -158,10 +159,9 @@ cdef class Reducer:
158159
and util.is_array(res.values)):
159160
res = res.values
160161
if i == 0:
161-
result = _get_result_array(res,
162-
self.nresults,
163-
len(self.dummy))
164-
it = <flatiter>PyArray_IterNew(result)
162+
# On the first pass, we check the output shape to see
163+
# if this looks like a reduction.
164+
_check_result_array(res, len(self.dummy))
165165

166166
PyArray_SETITEM(result, PyArray_ITER_DATA(it), res)
167167
chunk.data = chunk.data + self.increment
@@ -170,9 +170,7 @@ cdef class Reducer:
170170
# so we don't free the wrong memory
171171
chunk.data = dummy_buf
172172

173-
if result.dtype == np.object_:
174-
result = maybe_convert_objects(result)
175-
173+
result = maybe_convert_objects(result)
176174
return result
177175

178176

@@ -275,6 +273,8 @@ cdef class SeriesBinGrouper(_BaseGrouper):
275273
vslider = Slider(self.arr, self.dummy_arr)
276274
islider = Slider(self.index, self.dummy_index)
277275

276+
result = np.empty(self.ngroups, dtype='O')
277+
278278
try:
279279
for i in range(self.ngroups):
280280
group_size = counts[i]
@@ -289,10 +289,11 @@ cdef class SeriesBinGrouper(_BaseGrouper):
289289
res = self.f(cached_typ)
290290
res = _extract_result(res)
291291
if not initialized:
292+
# On the first pass, we check the output shape to see
293+
# if this looks like a reduction.
292294
initialized = 1
293-
result = _get_result_array(res,
294-
self.ngroups,
295-
len(self.dummy_arr))
295+
_check_result_array(res, len(self.dummy_arr))
296+
296297
result[i] = res
297298

298299
islider.advance(group_size)
@@ -303,9 +304,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
303304
islider.reset()
304305
vslider.reset()
305306

306-
if result.dtype == np.object_:
307-
result = maybe_convert_objects(result)
308-
307+
result = maybe_convert_objects(result)
309308
return result, counts
310309

311310

@@ -368,6 +367,8 @@ cdef class SeriesGrouper(_BaseGrouper):
368367
vslider = Slider(self.arr, self.dummy_arr)
369368
islider = Slider(self.index, self.dummy_index)
370369

370+
result = np.empty(self.ngroups, dtype='O')
371+
371372
try:
372373
for i in range(n):
373374
group_size += 1
@@ -391,10 +392,10 @@ cdef class SeriesGrouper(_BaseGrouper):
391392
res = self.f(cached_typ)
392393
res = _extract_result(res)
393394
if not initialized:
395+
# On the first pass, we check the output shape to see
396+
# if this looks like a reduction.
394397
initialized = 1
395-
result = _get_result_array(res,
396-
self.ngroups,
397-
len(self.dummy_arr))
398+
_check_result_array(res, len(self.dummy_arr))
398399

399400
result[lab] = res
400401
counts[lab] = group_size
@@ -410,10 +411,9 @@ cdef class SeriesGrouper(_BaseGrouper):
410411

411412
# We check for empty series in the constructor, so should always
412413
# have result initialized by this point.
413-
assert result is not None, "`result` has not been assigned."
414+
assert initialized, "`result` has not been initialized."
414415

415-
if result.dtype == np.object_:
416-
result = maybe_convert_objects(result)
416+
result = maybe_convert_objects(result)
417417

418418
return result, counts
419419

0 commit comments

Comments
 (0)