@@ -27,11 +27,11 @@ from pandas._libs.lib import (
27
27
)
28
28
29
29
30
- cpdef check_result_array(object obj):
30
+ cpdef check_result_array(object obj, Py_ssize_t cnt ):
31
31
32
32
if (is_array(obj) or
33
- (isinstance (obj, list ) and len (obj) == 0 ) or
34
- getattr (obj, ' shape' , None ) == (0 ,)):
33
+ (isinstance (obj, list ) and len (obj) == cnt ) or
34
+ getattr (obj, ' shape' , None ) == (cnt ,)):
35
35
raise ValueError (' Must produce aggregated value' )
36
36
37
37
@@ -53,43 +53,45 @@ cdef class _BaseGrouper:
53
53
54
54
return values, index
55
55
56
- cdef _init_dummy_series_and_index(self , Slider islider, Slider vslider):
57
- """
58
- Create Series and Index objects that we will alter in-place while iterating.
59
- """
60
- cached_index = self .ityp(islider.buf, dtype = self .idtype)
61
- cached_series = self .typ(
62
- vslider.buf, dtype = vslider.buf.dtype, index = cached_index, name = self .name
63
- )
64
- return cached_index, cached_series
65
-
66
- cdef inline _update_cached_objs(self , object cached_series, object cached_index,
56
+ cdef inline _update_cached_objs(self , object cached_typ, object cached_ityp,
67
57
Slider islider, Slider vslider):
68
- # See the comment in indexes/base.py about _index_data.
69
- # We need this for EA-backed indexes that have a reference
70
- # to a 1-d ndarray like datetime / timedelta / period.
71
- cached_index._engine.clear_mapping()
72
- cached_index._cache.clear() # e.g. inferred_freq must go
73
- cached_series._mgr.set_values(vslider.buf)
58
+ if cached_typ is None :
59
+ cached_ityp = self .ityp(islider.buf, dtype = self .idtype)
60
+ cached_typ = self .typ(
61
+ vslider.buf, dtype = vslider.buf.dtype, index = cached_ityp, name = self .name
62
+ )
63
+ else :
64
+ # See the comment in indexes/base.py about _index_data.
65
+ # We need this for EA-backed indexes that have a reference
66
+ # to a 1-d ndarray like datetime / timedelta / period.
67
+ object .__setattr__ (cached_ityp, ' _index_data' , islider.buf)
68
+ cached_ityp._engine.clear_mapping()
69
+ cached_ityp._cache.clear() # e.g. inferred_freq must go
70
+ cached_typ._mgr.set_values(vslider.buf)
71
+ object .__setattr__ (cached_typ, ' _index' , cached_ityp)
72
+ object .__setattr__ (cached_typ, ' name' , self .name)
73
+ return cached_typ, cached_ityp
74
74
75
75
cdef inline object _apply_to_group(self ,
76
- object cached_series , object cached_index ,
76
+ object cached_typ , object cached_ityp ,
77
77
bint initialized):
78
78
"""
79
79
Call self.f on our new group, then update to the next group.
80
80
"""
81
81
cdef:
82
82
object res
83
83
84
- # NB: we assume that _update_cached_objs has already cleared cleared
85
- # the cache and engine mapping
86
- res = self .f(cached_series )
84
+ cached_ityp._engine.clear_mapping()
85
+ cached_ityp._cache.clear() # e.g. inferred_freq must go
86
+ res = self .f(cached_typ )
87
87
res = extract_result(res)
88
88
if not initialized:
89
89
# On the first pass, we check the output shape to see
90
90
# if this looks like a reduction.
91
91
initialized = True
92
- check_result_array(res)
92
+ # In all tests other than test_series_grouper and
93
+ # test_series_bin_grouper, we have len(self.dummy_arr) == 0
94
+ check_result_array(res, len (self .dummy_arr))
93
95
94
96
return res, initialized
95
97
@@ -140,7 +142,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
140
142
object res
141
143
bint initialized = 0
142
144
Slider vslider, islider
143
- object cached_series = None , cached_index = None
145
+ object cached_typ = None , cached_ityp = None
144
146
145
147
counts = np.zeros(self .ngroups, dtype = np.int64)
146
148
@@ -160,10 +162,6 @@ cdef class SeriesBinGrouper(_BaseGrouper):
160
162
161
163
result = np.empty(self .ngroups, dtype = ' O' )
162
164
163
- cached_index, cached_series = self ._init_dummy_series_and_index(
164
- islider, vslider
165
- )
166
-
167
165
start = 0
168
166
try :
169
167
for i in range (self .ngroups):
@@ -173,10 +171,10 @@ cdef class SeriesBinGrouper(_BaseGrouper):
173
171
islider.move(start, end)
174
172
vslider.move(start, end)
175
173
176
- self ._update_cached_objs(
177
- cached_series, cached_index , islider, vslider)
174
+ cached_typ, cached_ityp = self ._update_cached_objs(
175
+ cached_typ, cached_ityp , islider, vslider)
178
176
179
- res, initialized = self ._apply_to_group(cached_series, cached_index ,
177
+ res, initialized = self ._apply_to_group(cached_typ, cached_ityp ,
180
178
initialized)
181
179
start += group_size
182
180
@@ -238,7 +236,7 @@ cdef class SeriesGrouper(_BaseGrouper):
238
236
object res
239
237
bint initialized = 0
240
238
Slider vslider, islider
241
- object cached_series = None , cached_index = None
239
+ object cached_typ = None , cached_ityp = None
242
240
243
241
labels = self .labels
244
242
counts = np.zeros(self .ngroups, dtype = np.int64)
@@ -250,10 +248,6 @@ cdef class SeriesGrouper(_BaseGrouper):
250
248
251
249
result = np.empty(self .ngroups, dtype = ' O' )
252
250
253
- cached_index, cached_series = self ._init_dummy_series_and_index(
254
- islider, vslider
255
- )
256
-
257
251
start = 0
258
252
try :
259
253
for i in range (n):
@@ -271,10 +265,10 @@ cdef class SeriesGrouper(_BaseGrouper):
271
265
islider.move(start, end)
272
266
vslider.move(start, end)
273
267
274
- self ._update_cached_objs(
275
- cached_series, cached_index , islider, vslider)
268
+ cached_typ, cached_ityp = self ._update_cached_objs(
269
+ cached_typ, cached_ityp , islider, vslider)
276
270
277
- res, initialized = self ._apply_to_group(cached_series, cached_index ,
271
+ res, initialized = self ._apply_to_group(cached_typ, cached_ityp ,
278
272
initialized)
279
273
280
274
start += group_size
@@ -297,20 +291,20 @@ cdef class SeriesGrouper(_BaseGrouper):
297
291
return result, counts
298
292
299
293
300
- cpdef inline extract_result(object res):
294
+ cpdef inline extract_result(object res, bint squeeze = True ):
301
295
""" extract the result object, it might be a 0-dim ndarray
302
296
or a len-1 0-dim, or a scalar """
303
297
if hasattr (res, " _values" ):
304
298
# Preserve EA
305
299
res = res._values
306
- if res.ndim == 1 and len (res) == 1 :
300
+ if squeeze and res.ndim == 1 and len (res) == 1 :
307
301
res = res[0 ]
308
302
if hasattr (res, ' values' ) and is_array(res.values):
309
303
res = res.values
310
304
if is_array(res):
311
305
if res.ndim == 0 :
312
306
res = res.item()
313
- elif res.ndim == 1 and len (res) == 1 :
307
+ elif squeeze and res.ndim == 1 and len (res) == 1 :
314
308
res = res[0 ]
315
309
return res
316
310
@@ -495,6 +489,6 @@ cdef class BlockSlider:
495
489
Ensure that we have the original blocks, blknos, and blklocs.
496
490
"""
497
491
mgr = self .dummy._mgr
498
- mgr.blocks = tuple ( self .blocks)
492
+ mgr.blocks = self .blocks
499
493
mgr._blklocs = self .orig_blklocs
500
494
mgr._blknos = self .orig_blknos
0 commit comments