1
1
from copy import copy
2
2
3
- from cython import Py_ssize_t
4
-
5
- from libc.stdlib cimport free, malloc
6
-
7
3
import numpy as np
8
4
9
5
cimport numpy as cnp
10
6
from numpy cimport int64_t, ndarray
11
7
12
8
cnp.import_array()
13
9
14
- from pandas._libs cimport util
10
+ from pandas._libs.util cimport is_array
15
11
16
12
from pandas._libs.lib import is_scalar, maybe_convert_objects
17
13
18
14
19
15
cpdef check_result_array(object obj, Py_ssize_t cnt):
20
16
21
- if (util. is_array(obj) or
17
+ if (is_array(obj) or
22
18
(isinstance (obj, list ) and len (obj) == cnt) or
23
19
getattr (obj, ' shape' , None ) == (cnt,)):
24
20
raise ValueError (' Must produce aggregated value' )
@@ -33,7 +29,7 @@ cdef class _BaseGrouper:
33
29
if (dummy.dtype != self .arr.dtype
34
30
and values.dtype != self .arr.dtype):
35
31
raise ValueError (' Dummy array must be same dtype' )
36
- if util. is_array(values) and not values.flags.contiguous:
32
+ if is_array(values) and not values.flags.contiguous:
37
33
# e.g. Categorical has no `flags` attribute
38
34
values = values.copy()
39
35
index = dummy.index.values
@@ -106,7 +102,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
106
102
self .f = f
107
103
108
104
values = series.values
109
- if util. is_array(values) and not values.flags.c_contiguous:
105
+ if is_array(values) and not values.flags.c_contiguous:
110
106
# e.g. Categorical has no `flags` attribute
111
107
values = values.copy(' C' )
112
108
self .arr = values
@@ -204,7 +200,7 @@ cdef class SeriesGrouper(_BaseGrouper):
204
200
self .f = f
205
201
206
202
values = series.values
207
- if util. is_array(values) and not values.flags.c_contiguous:
203
+ if is_array(values) and not values.flags.c_contiguous:
208
204
# e.g. Categorical has no `flags` attribute
209
205
values = values.copy(' C' )
210
206
self .arr = values
@@ -288,9 +284,9 @@ cpdef inline extract_result(object res, bint squeeze=True):
288
284
res = res._values
289
285
if squeeze and res.ndim == 1 and len (res) == 1 :
290
286
res = res[0 ]
291
- if hasattr (res, ' values' ) and util. is_array(res.values):
287
+ if hasattr (res, ' values' ) and is_array(res.values):
292
288
res = res.values
293
- if util. is_array(res):
289
+ if is_array(res):
294
290
if res.ndim == 0 :
295
291
res = res.item()
296
292
elif squeeze and res.ndim == 1 and len (res) == 1 :
@@ -304,7 +300,7 @@ cdef class Slider:
304
300
"""
305
301
cdef:
306
302
ndarray values, buf
307
- Py_ssize_t stride, orig_len, orig_stride
303
+ Py_ssize_t stride, orig_len
308
304
char * orig_data
309
305
310
306
def __init__ (self , ndarray values , ndarray buf ):
@@ -320,7 +316,6 @@ cdef class Slider:
320
316
321
317
self .orig_data = self .buf.data
322
318
self .orig_len = self .buf.shape[0 ]
323
- self .orig_stride = self .buf.strides[0 ]
324
319
325
320
self .buf.data = self .values.data
326
321
self .buf.strides[0 ] = self .stride
@@ -333,10 +328,8 @@ cdef class Slider:
333
328
self .buf.shape[0 ] = end - start
334
329
335
330
cdef reset(self ):
336
-
337
- self .buf.shape[0 ] = self .orig_len
338
331
self .buf.data = self .orig_data
339
- self .buf.strides [0 ] = self .orig_stride
332
+ self .buf.shape [0 ] = self .orig_len
340
333
341
334
342
335
class InvalidApply (Exception ):
@@ -346,7 +339,6 @@ class InvalidApply(Exception):
346
339
def apply_frame_axis0 (object frame , object f , object names ,
347
340
const int64_t[:] starts , const int64_t[:] ends ):
348
341
cdef:
349
- BlockSlider slider
350
342
Py_ssize_t i, n = len (starts)
351
343
list results
352
344
object piece
@@ -357,124 +349,42 @@ def apply_frame_axis0(object frame, object f, object names,
357
349
358
350
results = []
359
351
360
- slider = BlockSlider(frame)
361
-
362
352
mutated = False
363
- item_cache = slider.dummy._item_cache
364
- try :
365
- for i in range (n):
366
- slider.move(starts[i], ends[i])
353
+ item_cache = frame._item_cache
367
354
368
- item_cache.clear() # ugh
369
- chunk = slider.dummy
370
- object .__setattr__ (chunk, ' name' , names[i])
355
+ for i in range (n):
356
+ item_cache.clear() # ugh
357
+ chunk = frame[starts[i]:ends[i]]
358
+ object .__setattr__ (chunk, ' name' , names[i])
371
359
372
- try :
373
- piece = f(chunk)
374
- except Exception :
375
- # We can't be more specific without knowing something about `f`
376
- raise InvalidApply(' Let this error raise above us' )
360
+ try :
361
+ piece = f(chunk)
362
+ except Exception :
363
+ # We can't be more specific without knowing something about `f`
364
+ raise InvalidApply(' Let this error raise above us' )
377
365
378
- # Need to infer if low level index slider will cause segfaults
379
- require_slow_apply = i == 0 and piece is chunk
366
+ # Need to infer if low level index slider will cause segfaults
367
+ require_slow_apply = i == 0 and piece is chunk
368
+ try :
369
+ if not piece.index is chunk.index:
370
+ mutated = True
371
+ except AttributeError :
372
+ # `piece` might not have an index, could be e.g. an int
373
+ pass
374
+
375
+ if not is_scalar(piece):
376
+ # Need to copy data to avoid appending references
380
377
try :
381
- if not piece.index is chunk.index:
382
- mutated = True
383
- except AttributeError :
384
- # `piece` might not have an index, could be e.g. an int
385
- pass
386
-
387
- if not is_scalar(piece):
388
- # Need to copy data to avoid appending references
389
- try :
390
- piece = piece.copy(deep = " all" )
391
- except (TypeError , AttributeError ):
392
- piece = copy(piece)
393
-
394
- results.append(piece)
395
-
396
- # If the data was modified inplace we need to
397
- # take the slow path to not risk segfaults
398
- # we have already computed the first piece
399
- if require_slow_apply:
400
- break
401
- finally :
402
- slider.reset()
378
+ piece = piece.copy(deep = " all" )
379
+ except (TypeError , AttributeError ):
380
+ piece = copy(piece)
403
381
404
- return results, mutated
382
+ results.append(piece)
405
383
384
+ # If the data was modified inplace we need to
385
+ # take the slow path to not risk segfaults
386
+ # we have already computed the first piece
387
+ if require_slow_apply:
388
+ break
406
389
407
- cdef class BlockSlider:
408
- """
409
- Only capable of sliding on axis=0
410
- """
411
-
412
- cdef public:
413
- object frame, dummy, index
414
- int nblocks
415
- Slider idx_slider
416
- list blocks
417
-
418
- cdef:
419
- char ** base_ptrs
420
-
421
- def __init__ (self , object frame ):
422
- cdef:
423
- Py_ssize_t i
424
- object b
425
-
426
- self .frame = frame
427
- self .dummy = frame[:0 ]
428
- self .index = self .dummy.index
429
-
430
- self .blocks = [b.values for b in self .dummy._mgr.blocks]
431
-
432
- for x in self .blocks:
433
- util.set_array_not_contiguous(x)
434
-
435
- self .nblocks = len (self .blocks)
436
- # See the comment in indexes/base.py about _index_data.
437
- # We need this for EA-backed indexes that have a reference to a 1-d
438
- # ndarray like datetime / timedelta / period.
439
- self .idx_slider = Slider(
440
- self .frame.index._index_data, self .dummy.index._index_data)
441
-
442
- self .base_ptrs = < char ** > malloc(sizeof(char * ) * len (self .blocks))
443
- for i, block in enumerate (self .blocks):
444
- self .base_ptrs[i] = (< ndarray> block).data
445
-
446
- def __dealloc__ (self ):
447
- free(self .base_ptrs)
448
-
449
- cdef move(self , int start, int end):
450
- cdef:
451
- ndarray arr
452
- Py_ssize_t i
453
-
454
- # move blocks
455
- for i in range (self .nblocks):
456
- arr = self .blocks[i]
457
-
458
- # axis=1 is the frame's axis=0
459
- arr.data = self .base_ptrs[i] + arr.strides[1 ] * start
460
- arr.shape[1 ] = end - start
461
-
462
- # move and set the index
463
- self .idx_slider.move(start, end)
464
-
465
- object .__setattr__ (self .index, ' _index_data' , self .idx_slider.buf)
466
- self .index._engine.clear_mapping()
467
- self .index._cache.clear() # e.g. inferred_freq must go
468
-
469
- cdef reset(self ):
470
- cdef:
471
- ndarray arr
472
- Py_ssize_t i
473
-
474
- # reset blocks
475
- for i in range (self .nblocks):
476
- arr = self .blocks[i]
477
-
478
- # axis=1 is the frame's axis=0
479
- arr.data = self .base_ptrs[i]
480
- arr.shape[1 ] = 0
390
+ return results, mutated
0 commit comments