1
1
from copy import copy
2
2
3
+ from cython import Py_ssize_t
4
+
5
+ from libc.stdlib cimport free, malloc
6
+
3
7
import numpy as np
4
8
5
9
cimport numpy as cnp
6
10
from numpy cimport int64_t, ndarray
7
11
8
12
cnp.import_array()
9
13
10
- from pandas._libs.util cimport is_array
14
+ from pandas._libs cimport util
11
15
12
16
from pandas._libs.lib import is_scalar, maybe_convert_objects
13
17
14
18
15
19
cpdef check_result_array(object obj, Py_ssize_t cnt):
16
20
17
- if (is_array(obj) or
21
+ if (util. is_array(obj) or
18
22
(isinstance (obj, list ) and len (obj) == cnt) or
19
23
getattr (obj, ' shape' , None ) == (cnt,)):
20
24
raise ValueError (' Must produce aggregated value' )
@@ -29,7 +33,7 @@ cdef class _BaseGrouper:
29
33
if (dummy.dtype != self .arr.dtype
30
34
and values.dtype != self .arr.dtype):
31
35
raise ValueError (' Dummy array must be same dtype' )
32
- if is_array(values) and not values.flags.contiguous:
36
+ if util. is_array(values) and not values.flags.contiguous:
33
37
# e.g. Categorical has no `flags` attribute
34
38
values = values.copy()
35
39
index = dummy.index.values
@@ -102,7 +106,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
102
106
self .f = f
103
107
104
108
values = series.values
105
- if is_array(values) and not values.flags.c_contiguous:
109
+ if util. is_array(values) and not values.flags.c_contiguous:
106
110
# e.g. Categorical has no `flags` attribute
107
111
values = values.copy(' C' )
108
112
self .arr = values
@@ -200,7 +204,7 @@ cdef class SeriesGrouper(_BaseGrouper):
200
204
self .f = f
201
205
202
206
values = series.values
203
- if is_array(values) and not values.flags.c_contiguous:
207
+ if util. is_array(values) and not values.flags.c_contiguous:
204
208
# e.g. Categorical has no `flags` attribute
205
209
values = values.copy(' C' )
206
210
self .arr = values
@@ -284,9 +288,9 @@ cpdef inline extract_result(object res, bint squeeze=True):
284
288
res = res._values
285
289
if squeeze and res.ndim == 1 and len (res) == 1 :
286
290
res = res[0 ]
287
- if hasattr (res, ' values' ) and is_array(res.values):
291
+ if hasattr (res, ' values' ) and util. is_array(res.values):
288
292
res = res.values
289
- if is_array(res):
293
+ if util. is_array(res):
290
294
if res.ndim == 0 :
291
295
res = res.item()
292
296
elif squeeze and res.ndim == 1 and len (res) == 1 :
@@ -300,7 +304,7 @@ cdef class Slider:
300
304
"""
301
305
cdef:
302
306
ndarray values, buf
303
- Py_ssize_t stride, orig_len
307
+ Py_ssize_t stride, orig_len, orig_stride
304
308
char * orig_data
305
309
306
310
def __init__ (self , ndarray values , ndarray buf ):
@@ -316,6 +320,7 @@ cdef class Slider:
316
320
317
321
self .orig_data = self .buf.data
318
322
self .orig_len = self .buf.shape[0 ]
323
+ self .orig_stride = self .buf.strides[0 ]
319
324
320
325
self .buf.data = self .values.data
321
326
self .buf.strides[0 ] = self .stride
@@ -329,8 +334,9 @@ cdef class Slider:
329
334
330
335
cdef reset(self ):
331
336
332
- self .buf.data = self .orig_data
333
337
self .buf.shape[0 ] = self .orig_len
338
+ self .buf.data = self .orig_data
339
+ self .buf.strides[0 ] = self .orig_stride
334
340
335
341
336
342
class InvalidApply (Exception ):
@@ -340,6 +346,7 @@ class InvalidApply(Exception):
340
346
def apply_frame_axis0 (object frame , object f , object names ,
341
347
const int64_t[:] starts , const int64_t[:] ends ):
342
348
cdef:
349
+ BlockSlider slider
343
350
Py_ssize_t i, n = len (starts)
344
351
list results
345
352
object piece
@@ -350,13 +357,16 @@ def apply_frame_axis0(object frame, object f, object names,
350
357
351
358
results = []
352
359
360
+ slider = BlockSlider(frame)
361
+
353
362
mutated = False
354
- item_cache = frame ._item_cache
363
+ item_cache = slider.dummy ._item_cache
355
364
try :
356
365
for i in range (n):
366
+ slider.move(starts[i], ends[i])
357
367
358
368
item_cache.clear() # ugh
359
- chunk = frame[starts[i]:ends[i]]
369
+ chunk = slider.dummy
360
370
object .__setattr__ (chunk, ' name' , names[i])
361
371
362
372
try :
@@ -389,6 +399,82 @@ def apply_frame_axis0(object frame, object f, object names,
389
399
if require_slow_apply:
390
400
break
391
401
finally :
392
- pass
402
+ slider.reset()
393
403
394
404
return results, mutated
405
+
406
+
407
+ cdef class BlockSlider:
408
+ """
409
+ Only capable of sliding on axis=0
410
+ """
411
+
412
+ cdef public:
413
+ object frame, dummy, index
414
+ int nblocks
415
+ Slider idx_slider
416
+ list blocks
417
+
418
+ cdef:
419
+ char ** base_ptrs
420
+
421
+ def __init__ (self , object frame ):
422
+ cdef:
423
+ Py_ssize_t i
424
+ object b
425
+
426
+ self .frame = frame
427
+ self .dummy = frame[:0 ]
428
+ self .index = self .dummy.index
429
+
430
+ self .blocks = [b.values for b in self .dummy._mgr.blocks]
431
+
432
+ for x in self .blocks:
433
+ util.set_array_not_contiguous(x)
434
+
435
+ self .nblocks = len (self .blocks)
436
+ # See the comment in indexes/base.py about _index_data.
437
+ # We need this for EA-backed indexes that have a reference to a 1-d
438
+ # ndarray like datetime / timedelta / period.
439
+ self .idx_slider = Slider(
440
+ self .frame.index._index_data, self .dummy.index._index_data)
441
+
442
+ self .base_ptrs = < char ** > malloc(sizeof(char * ) * len (self .blocks))
443
+ for i, block in enumerate (self .blocks):
444
+ self .base_ptrs[i] = (< ndarray> block).data
445
+
446
+ def __dealloc__ (self ):
447
+ free(self .base_ptrs)
448
+
449
+ cdef move(self , int start, int end):
450
+ cdef:
451
+ ndarray arr
452
+ Py_ssize_t i
453
+
454
+ # move blocks
455
+ for i in range (self .nblocks):
456
+ arr = self .blocks[i]
457
+
458
+ # axis=1 is the frame's axis=0
459
+ arr.data = self .base_ptrs[i] + arr.strides[1 ] * start
460
+ arr.shape[1 ] = end - start
461
+
462
+ # move and set the index
463
+ self .idx_slider.move(start, end)
464
+
465
+ object .__setattr__ (self .index, ' _index_data' , self .idx_slider.buf)
466
+ self .index._engine.clear_mapping()
467
+ self .index._cache.clear() # e.g. inferred_freq must go
468
+
469
+ cdef reset(self ):
470
+ cdef:
471
+ ndarray arr
472
+ Py_ssize_t i
473
+
474
+ # reset blocks
475
+ for i in range (self .nblocks):
476
+ arr = self .blocks[i]
477
+
478
+ # axis=1 is the frame's axis=0
479
+ arr.data = self .base_ptrs[i]
480
+ arr.shape[1 ] = 0
0 commit comments