Skip to content

Commit fc1fe49

Browse files
committed
Revert "BLD: remove blockslider pandas-dev#34014"
This reverts commit 687a0ce.
1 parent 687a0ce commit fc1fe49

File tree

1 file changed

+98
-12
lines changed

1 file changed

+98
-12
lines changed

pandas/_libs/reduction.pyx

+98-12
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
from copy import copy
22

3+
from cython import Py_ssize_t
4+
5+
from libc.stdlib cimport free, malloc
6+
37
import numpy as np
48

59
cimport numpy as cnp
610
from numpy cimport int64_t, ndarray
711

812
cnp.import_array()
913

10-
from pandas._libs.util cimport is_array
14+
from pandas._libs cimport util
1115

1216
from pandas._libs.lib import is_scalar, maybe_convert_objects
1317

1418

1519
cpdef check_result_array(object obj, Py_ssize_t cnt):
1620

17-
if (is_array(obj) or
21+
if (util.is_array(obj) or
1822
(isinstance(obj, list) and len(obj) == cnt) or
1923
getattr(obj, 'shape', None) == (cnt,)):
2024
raise ValueError('Must produce aggregated value')
@@ -29,7 +33,7 @@ cdef class _BaseGrouper:
2933
if (dummy.dtype != self.arr.dtype
3034
and values.dtype != self.arr.dtype):
3135
raise ValueError('Dummy array must be same dtype')
32-
if is_array(values) and not values.flags.contiguous:
36+
if util.is_array(values) and not values.flags.contiguous:
3337
# e.g. Categorical has no `flags` attribute
3438
values = values.copy()
3539
index = dummy.index.values
@@ -102,7 +106,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
102106
self.f = f
103107

104108
values = series.values
105-
if is_array(values) and not values.flags.c_contiguous:
109+
if util.is_array(values) and not values.flags.c_contiguous:
106110
# e.g. Categorical has no `flags` attribute
107111
values = values.copy('C')
108112
self.arr = values
@@ -200,7 +204,7 @@ cdef class SeriesGrouper(_BaseGrouper):
200204
self.f = f
201205

202206
values = series.values
203-
if is_array(values) and not values.flags.c_contiguous:
207+
if util.is_array(values) and not values.flags.c_contiguous:
204208
# e.g. Categorical has no `flags` attribute
205209
values = values.copy('C')
206210
self.arr = values
@@ -284,9 +288,9 @@ cpdef inline extract_result(object res, bint squeeze=True):
284288
res = res._values
285289
if squeeze and res.ndim == 1 and len(res) == 1:
286290
res = res[0]
287-
if hasattr(res, 'values') and is_array(res.values):
291+
if hasattr(res, 'values') and util.is_array(res.values):
288292
res = res.values
289-
if is_array(res):
293+
if util.is_array(res):
290294
if res.ndim == 0:
291295
res = res.item()
292296
elif squeeze and res.ndim == 1 and len(res) == 1:
@@ -300,7 +304,7 @@ cdef class Slider:
300304
"""
301305
cdef:
302306
ndarray values, buf
303-
Py_ssize_t stride, orig_len
307+
Py_ssize_t stride, orig_len, orig_stride
304308
char *orig_data
305309

306310
def __init__(self, ndarray values, ndarray buf):
@@ -316,6 +320,7 @@ cdef class Slider:
316320

317321
self.orig_data = self.buf.data
318322
self.orig_len = self.buf.shape[0]
323+
self.orig_stride = self.buf.strides[0]
319324

320325
self.buf.data = self.values.data
321326
self.buf.strides[0] = self.stride
@@ -329,8 +334,9 @@ cdef class Slider:
329334

330335
cdef reset(self):
331336

332-
self.buf.data = self.orig_data
333337
self.buf.shape[0] = self.orig_len
338+
self.buf.data = self.orig_data
339+
self.buf.strides[0] = self.orig_stride
334340

335341

336342
class InvalidApply(Exception):
@@ -340,6 +346,7 @@ class InvalidApply(Exception):
340346
def apply_frame_axis0(object frame, object f, object names,
341347
const int64_t[:] starts, const int64_t[:] ends):
342348
cdef:
349+
BlockSlider slider
343350
Py_ssize_t i, n = len(starts)
344351
list results
345352
object piece
@@ -350,13 +357,16 @@ def apply_frame_axis0(object frame, object f, object names,
350357

351358
results = []
352359

360+
slider = BlockSlider(frame)
361+
353362
mutated = False
354-
item_cache = frame._item_cache
363+
item_cache = slider.dummy._item_cache
355364
try:
356365
for i in range(n):
366+
slider.move(starts[i], ends[i])
357367

358368
item_cache.clear() # ugh
359-
chunk = frame[starts[i]:ends[i]]
369+
chunk = slider.dummy
360370
object.__setattr__(chunk, 'name', names[i])
361371

362372
try:
@@ -389,6 +399,82 @@ def apply_frame_axis0(object frame, object f, object names,
389399
if require_slow_apply:
390400
break
391401
finally:
392-
pass
402+
slider.reset()
393403

394404
return results, mutated
405+
406+
407+
cdef class BlockSlider:
408+
"""
409+
Only capable of sliding on axis=0
410+
"""
411+
412+
cdef public:
413+
object frame, dummy, index
414+
int nblocks
415+
Slider idx_slider
416+
list blocks
417+
418+
cdef:
419+
char **base_ptrs
420+
421+
def __init__(self, object frame):
422+
cdef:
423+
Py_ssize_t i
424+
object b
425+
426+
self.frame = frame
427+
self.dummy = frame[:0]
428+
self.index = self.dummy.index
429+
430+
self.blocks = [b.values for b in self.dummy._mgr.blocks]
431+
432+
for x in self.blocks:
433+
util.set_array_not_contiguous(x)
434+
435+
self.nblocks = len(self.blocks)
436+
# See the comment in indexes/base.py about _index_data.
437+
# We need this for EA-backed indexes that have a reference to a 1-d
438+
# ndarray like datetime / timedelta / period.
439+
self.idx_slider = Slider(
440+
self.frame.index._index_data, self.dummy.index._index_data)
441+
442+
self.base_ptrs = <char**>malloc(sizeof(char*) * len(self.blocks))
443+
for i, block in enumerate(self.blocks):
444+
self.base_ptrs[i] = (<ndarray>block).data
445+
446+
def __dealloc__(self):
447+
free(self.base_ptrs)
448+
449+
cdef move(self, int start, int end):
450+
cdef:
451+
ndarray arr
452+
Py_ssize_t i
453+
454+
# move blocks
455+
for i in range(self.nblocks):
456+
arr = self.blocks[i]
457+
458+
# axis=1 is the frame's axis=0
459+
arr.data = self.base_ptrs[i] + arr.strides[1] * start
460+
arr.shape[1] = end - start
461+
462+
# move and set the index
463+
self.idx_slider.move(start, end)
464+
465+
object.__setattr__(self.index, '_index_data', self.idx_slider.buf)
466+
self.index._engine.clear_mapping()
467+
self.index._cache.clear() # e.g. inferred_freq must go
468+
469+
cdef reset(self):
470+
cdef:
471+
ndarray arr
472+
Py_ssize_t i
473+
474+
# reset blocks
475+
for i in range(self.nblocks):
476+
arr = self.blocks[i]
477+
478+
# axis=1 is the frame's axis=0
479+
arr.data = self.base_ptrs[i]
480+
arr.shape[1] = 0

0 commit comments

Comments
 (0)