Skip to content

Commit a82d377

Browse files
committed
BLD: remove blockslider pandas-dev#34014
1 parent 345efdd commit a82d377

File tree

1 file changed

+40
-130
lines changed

1 file changed

+40
-130
lines changed

pandas/_libs/reduction.pyx

+40-130
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,20 @@
11
from copy import copy
22

3-
from cython import Py_ssize_t
4-
5-
from libc.stdlib cimport free, malloc
6-
73
import numpy as np
84

95
cimport numpy as cnp
106
from numpy cimport int64_t, ndarray
117

128
cnp.import_array()
139

14-
from pandas._libs cimport util
10+
from pandas._libs.util cimport is_array
1511

1612
from pandas._libs.lib import is_scalar, maybe_convert_objects
1713

1814

1915
cpdef check_result_array(object obj, Py_ssize_t cnt):
2016

21-
if (util.is_array(obj) or
17+
if (is_array(obj) or
2218
(isinstance(obj, list) and len(obj) == cnt) or
2319
getattr(obj, 'shape', None) == (cnt,)):
2420
raise ValueError('Must produce aggregated value')
@@ -33,7 +29,7 @@ cdef class _BaseGrouper:
3329
if (dummy.dtype != self.arr.dtype
3430
and values.dtype != self.arr.dtype):
3531
raise ValueError('Dummy array must be same dtype')
36-
if util.is_array(values) and not values.flags.contiguous:
32+
if is_array(values) and not values.flags.contiguous:
3733
# e.g. Categorical has no `flags` attribute
3834
values = values.copy()
3935
index = dummy.index.values
@@ -106,7 +102,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
106102
self.f = f
107103

108104
values = series.values
109-
if util.is_array(values) and not values.flags.c_contiguous:
105+
if is_array(values) and not values.flags.c_contiguous:
110106
# e.g. Categorical has no `flags` attribute
111107
values = values.copy('C')
112108
self.arr = values
@@ -204,7 +200,7 @@ cdef class SeriesGrouper(_BaseGrouper):
204200
self.f = f
205201

206202
values = series.values
207-
if util.is_array(values) and not values.flags.c_contiguous:
203+
if is_array(values) and not values.flags.c_contiguous:
208204
# e.g. Categorical has no `flags` attribute
209205
values = values.copy('C')
210206
self.arr = values
@@ -288,9 +284,9 @@ cpdef inline extract_result(object res, bint squeeze=True):
288284
res = res._values
289285
if squeeze and res.ndim == 1 and len(res) == 1:
290286
res = res[0]
291-
if hasattr(res, 'values') and util.is_array(res.values):
287+
if hasattr(res, 'values') and is_array(res.values):
292288
res = res.values
293-
if util.is_array(res):
289+
if is_array(res):
294290
if res.ndim == 0:
295291
res = res.item()
296292
elif squeeze and res.ndim == 1 and len(res) == 1:
@@ -304,7 +300,7 @@ cdef class Slider:
304300
"""
305301
cdef:
306302
ndarray values, buf
307-
Py_ssize_t stride, orig_len, orig_stride
303+
Py_ssize_t stride, orig_len
308304
char *orig_data
309305

310306
def __init__(self, ndarray values, ndarray buf):
@@ -320,7 +316,6 @@ cdef class Slider:
320316

321317
self.orig_data = self.buf.data
322318
self.orig_len = self.buf.shape[0]
323-
self.orig_stride = self.buf.strides[0]
324319

325320
self.buf.data = self.values.data
326321
self.buf.strides[0] = self.stride
@@ -333,10 +328,8 @@ cdef class Slider:
333328
self.buf.shape[0] = end - start
334329

335330
cdef reset(self):
336-
337-
self.buf.shape[0] = self.orig_len
338331
self.buf.data = self.orig_data
339-
self.buf.strides[0] = self.orig_stride
332+
self.buf.shape[0] = self.orig_len
340333

341334

342335
class InvalidApply(Exception):
@@ -346,7 +339,6 @@ class InvalidApply(Exception):
346339
def apply_frame_axis0(object frame, object f, object names,
347340
const int64_t[:] starts, const int64_t[:] ends):
348341
cdef:
349-
BlockSlider slider
350342
Py_ssize_t i, n = len(starts)
351343
list results
352344
object piece
@@ -357,124 +349,42 @@ def apply_frame_axis0(object frame, object f, object names,
357349

358350
results = []
359351

360-
slider = BlockSlider(frame)
361-
362352
mutated = False
363-
item_cache = slider.dummy._item_cache
364-
try:
365-
for i in range(n):
366-
slider.move(starts[i], ends[i])
353+
item_cache = frame._item_cache
367354

368-
item_cache.clear() # ugh
369-
chunk = slider.dummy
370-
object.__setattr__(chunk, 'name', names[i])
355+
for i in range(n):
356+
item_cache.clear() # ugh
357+
chunk = frame[starts[i]:ends[i]]
358+
object.__setattr__(chunk, 'name', names[i])
371359

372-
try:
373-
piece = f(chunk)
374-
except Exception:
375-
# We can't be more specific without knowing something about `f`
376-
raise InvalidApply('Let this error raise above us')
360+
try:
361+
piece = f(chunk)
362+
except Exception:
363+
# We can't be more specific without knowing something about `f`
364+
raise InvalidApply('Let this error raise above us')
377365

378-
# Need to infer if low level index slider will cause segfaults
379-
require_slow_apply = i == 0 and piece is chunk
366+
# Need to infer if low level index slider will cause segfaults
367+
require_slow_apply = i == 0 and piece is chunk
368+
try:
369+
if not piece.index is chunk.index:
370+
mutated = True
371+
except AttributeError:
372+
# `piece` might not have an index, could be e.g. an int
373+
pass
374+
375+
if not is_scalar(piece):
376+
# Need to copy data to avoid appending references
380377
try:
381-
if not piece.index is chunk.index:
382-
mutated = True
383-
except AttributeError:
384-
# `piece` might not have an index, could be e.g. an int
385-
pass
386-
387-
if not is_scalar(piece):
388-
# Need to copy data to avoid appending references
389-
try:
390-
piece = piece.copy(deep="all")
391-
except (TypeError, AttributeError):
392-
piece = copy(piece)
393-
394-
results.append(piece)
395-
396-
# If the data was modified inplace we need to
397-
# take the slow path to not risk segfaults
398-
# we have already computed the first piece
399-
if require_slow_apply:
400-
break
401-
finally:
402-
slider.reset()
378+
piece = piece.copy(deep="all")
379+
except (TypeError, AttributeError):
380+
piece = copy(piece)
403381

404-
return results, mutated
382+
results.append(piece)
405383

384+
# If the data was modified inplace we need to
385+
# take the slow path to not risk segfaults
386+
# we have already computed the first piece
387+
if require_slow_apply:
388+
break
406389

407-
cdef class BlockSlider:
408-
"""
409-
Only capable of sliding on axis=0
410-
"""
411-
412-
cdef public:
413-
object frame, dummy, index
414-
int nblocks
415-
Slider idx_slider
416-
list blocks
417-
418-
cdef:
419-
char **base_ptrs
420-
421-
def __init__(self, object frame):
422-
cdef:
423-
Py_ssize_t i
424-
object b
425-
426-
self.frame = frame
427-
self.dummy = frame[:0]
428-
self.index = self.dummy.index
429-
430-
self.blocks = [b.values for b in self.dummy._mgr.blocks]
431-
432-
for x in self.blocks:
433-
util.set_array_not_contiguous(x)
434-
435-
self.nblocks = len(self.blocks)
436-
# See the comment in indexes/base.py about _index_data.
437-
# We need this for EA-backed indexes that have a reference to a 1-d
438-
# ndarray like datetime / timedelta / period.
439-
self.idx_slider = Slider(
440-
self.frame.index._index_data, self.dummy.index._index_data)
441-
442-
self.base_ptrs = <char**>malloc(sizeof(char*) * len(self.blocks))
443-
for i, block in enumerate(self.blocks):
444-
self.base_ptrs[i] = (<ndarray>block).data
445-
446-
def __dealloc__(self):
447-
free(self.base_ptrs)
448-
449-
cdef move(self, int start, int end):
450-
cdef:
451-
ndarray arr
452-
Py_ssize_t i
453-
454-
# move blocks
455-
for i in range(self.nblocks):
456-
arr = self.blocks[i]
457-
458-
# axis=1 is the frame's axis=0
459-
arr.data = self.base_ptrs[i] + arr.strides[1] * start
460-
arr.shape[1] = end - start
461-
462-
# move and set the index
463-
self.idx_slider.move(start, end)
464-
465-
object.__setattr__(self.index, '_index_data', self.idx_slider.buf)
466-
self.index._engine.clear_mapping()
467-
self.index._cache.clear() # e.g. inferred_freq must go
468-
469-
cdef reset(self):
470-
cdef:
471-
ndarray arr
472-
Py_ssize_t i
473-
474-
# reset blocks
475-
for i in range(self.nblocks):
476-
arr = self.blocks[i]
477-
478-
# axis=1 is the frame's axis=0
479-
arr.data = self.base_ptrs[i]
480-
arr.shape[1] = 0
390+
return results, mutated

0 commit comments

Comments
 (0)