Skip to content

Commit e57f967

Browse files
jbrockmendelfangchenli
authored andcommitted
CLN: remove libreduction.Reducer (pandas-dev#35001)
1 parent cdd7262 commit e57f967

File tree

2 files changed

+2
-208
lines changed

2 files changed

+2
-208
lines changed

pandas/_libs/reduction.pyx

+1-173
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,12 @@
11
from copy import copy
22

33
from cython import Py_ssize_t
4-
from cpython.ref cimport Py_INCREF
54

65
from libc.stdlib cimport malloc, free
76

87
import numpy as np
98
cimport numpy as cnp
10-
from numpy cimport (ndarray,
11-
int64_t,
12-
PyArray_SETITEM,
13-
PyArray_ITER_NEXT, PyArray_ITER_DATA, PyArray_IterNew,
14-
flatiter)
9+
from numpy cimport ndarray, int64_t
1510
cnp.import_array()
1611

1712
from pandas._libs cimport util
@@ -26,146 +21,6 @@ cdef _check_result_array(object obj, Py_ssize_t cnt):
2621
raise ValueError('Function does not reduce')
2722

2823

29-
cdef class Reducer:
30-
"""
31-
Performs generic reduction operation on a C or Fortran-contiguous ndarray
32-
while avoiding ndarray construction overhead
33-
"""
34-
cdef:
35-
Py_ssize_t increment, chunksize, nresults
36-
object dummy, f, labels, typ, ityp, index
37-
ndarray arr
38-
39-
def __init__(
40-
self, ndarray arr, object f, int axis=1, object dummy=None, object labels=None
41-
):
42-
cdef:
43-
Py_ssize_t n, k
44-
45-
n, k = (<object>arr).shape
46-
47-
if axis == 0:
48-
if not arr.flags.f_contiguous:
49-
arr = arr.copy('F')
50-
51-
self.nresults = k
52-
self.chunksize = n
53-
self.increment = n * arr.dtype.itemsize
54-
else:
55-
if not arr.flags.c_contiguous:
56-
arr = arr.copy('C')
57-
58-
self.nresults = n
59-
self.chunksize = k
60-
self.increment = k * arr.dtype.itemsize
61-
62-
self.f = f
63-
self.arr = arr
64-
self.labels = labels
65-
self.dummy, self.typ, self.index, self.ityp = self._check_dummy(
66-
dummy=dummy)
67-
68-
cdef _check_dummy(self, object dummy=None):
69-
cdef:
70-
object index = None, typ = None, ityp = None
71-
72-
if dummy is None:
73-
dummy = np.empty(self.chunksize, dtype=self.arr.dtype)
74-
75-
# our ref is stolen later since we are creating this array
76-
# in cython, so increment first
77-
Py_INCREF(dummy)
78-
79-
else:
80-
81-
# we passed a Series
82-
typ = type(dummy)
83-
index = dummy.index
84-
dummy = dummy.values
85-
86-
if dummy.dtype != self.arr.dtype:
87-
raise ValueError('Dummy array must be same dtype')
88-
if len(dummy) != self.chunksize:
89-
raise ValueError(f'Dummy array must be length {self.chunksize}')
90-
91-
return dummy, typ, index, ityp
92-
93-
def get_result(self):
94-
cdef:
95-
char* dummy_buf
96-
ndarray arr, result, chunk
97-
Py_ssize_t i
98-
flatiter it
99-
object res, name, labels
100-
object cached_typ = None
101-
102-
arr = self.arr
103-
chunk = self.dummy
104-
dummy_buf = chunk.data
105-
chunk.data = arr.data
106-
labels = self.labels
107-
108-
result = np.empty(self.nresults, dtype='O')
109-
it = <flatiter>PyArray_IterNew(result)
110-
reduction_success = True
111-
112-
try:
113-
for i in range(self.nresults):
114-
115-
# create the cached type
116-
# each time just reassign the data
117-
if i == 0:
118-
119-
if self.typ is not None:
120-
# In this case, we also have self.index
121-
name = labels[i]
122-
cached_typ = self.typ(
123-
chunk, index=self.index, name=name, dtype=arr.dtype)
124-
125-
# use the cached_typ if possible
126-
if cached_typ is not None:
127-
# In this case, we also have non-None labels
128-
name = labels[i]
129-
130-
object.__setattr__(
131-
cached_typ._mgr._block, 'values', chunk)
132-
object.__setattr__(cached_typ, 'name', name)
133-
res = self.f(cached_typ)
134-
else:
135-
res = self.f(chunk)
136-
137-
# TODO: reason for not squeezing here?
138-
extracted_res = _extract_result(res, squeeze=False)
139-
if i == 0:
140-
# On the first pass, we check the output shape to see
141-
# if this looks like a reduction.
142-
# If it does not, return the computed value to be used by the
143-
# pure python implementation,
144-
# so the function won't be called twice on the same object,
145-
# and side effects would occur twice
146-
try:
147-
_check_result_array(extracted_res, len(self.dummy))
148-
except ValueError as err:
149-
if "Function does not reduce" not in str(err):
150-
# catch only the specific exception
151-
raise
152-
153-
reduction_success = False
154-
PyArray_SETITEM(result, PyArray_ITER_DATA(it), copy(res))
155-
break
156-
157-
PyArray_SETITEM(result, PyArray_ITER_DATA(it), extracted_res)
158-
chunk.data = chunk.data + self.increment
159-
PyArray_ITER_NEXT(it)
160-
161-
finally:
162-
# so we don't free the wrong memory
163-
chunk.data = dummy_buf
164-
165-
result = maybe_convert_objects(result)
166-
return result, reduction_success
167-
168-
16924
cdef class _BaseGrouper:
17025
cdef _check_dummy(self, object dummy):
17126
# both values and index must be an ndarray!
@@ -610,30 +465,3 @@ cdef class BlockSlider:
610465
# axis=1 is the frame's axis=0
611466
arr.data = self.base_ptrs[i]
612467
arr.shape[1] = 0
613-
614-
615-
def compute_reduction(arr: ndarray, f, axis: int = 0, dummy=None, labels=None):
616-
"""
617-
618-
Parameters
619-
-----------
620-
arr : np.ndarray
621-
f : function
622-
axis : integer axis
623-
dummy : type of reduced output (series)
624-
labels : Index or None
625-
"""
626-
627-
# We either have both dummy and labels, or neither of them
628-
if (labels is None) ^ (dummy is None):
629-
raise ValueError("Must pass either dummy and labels, or neither")
630-
631-
if labels is not None:
632-
# Caller is responsible for ensuring we don't have MultiIndex
633-
assert labels.nlevels == 1
634-
635-
# pass as an ndarray/ExtensionArray
636-
labels = labels._values
637-
638-
reducer = Reducer(arr, f, axis=axis, dummy=dummy, labels=labels)
639-
return reducer.get_result()

pandas/tests/groupby/test_bin_groupby.py

+1-35
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas.core.dtypes.common import ensure_int64
77

88
import pandas as pd
9-
from pandas import Index, Series, isna
9+
from pandas import Series, isna
1010
import pandas._testing as tm
1111

1212

@@ -136,37 +136,3 @@ def _ohlc(group):
136136

137137
class TestMoments:
138138
pass
139-
140-
141-
class TestReducer:
142-
def test_int_index(self):
143-
arr = np.random.randn(100, 4)
144-
145-
msg = "Must pass either dummy and labels, or neither"
146-
# we must pass either both labels and dummy, or neither
147-
with pytest.raises(ValueError, match=msg):
148-
libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4)))
149-
150-
with pytest.raises(ValueError, match=msg):
151-
libreduction.compute_reduction(
152-
arr, np.sum, axis=1, labels=Index(np.arange(100))
153-
)
154-
155-
dummy = Series(0.0, index=np.arange(100))
156-
result, _ = libreduction.compute_reduction(
157-
arr, np.sum, dummy=dummy, labels=Index(np.arange(4))
158-
)
159-
expected = arr.sum(0)
160-
tm.assert_almost_equal(result, expected)
161-
162-
dummy = Series(0.0, index=np.arange(4))
163-
result, _ = libreduction.compute_reduction(
164-
arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
165-
)
166-
expected = arr.sum(1)
167-
tm.assert_almost_equal(result, expected)
168-
169-
result, _ = libreduction.compute_reduction(
170-
arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
171-
)
172-
tm.assert_almost_equal(result, expected)

0 commit comments

Comments
 (0)