Skip to content

Commit c2aaf8e

Browse files
authored
REF: remove libreduction.SeriesBinGrouper (pandas-dev#43189)
1 parent 2285eeb commit c2aaf8e

File tree

3 files changed

+8
-123
lines changed

3 files changed

+8
-123
lines changed

pandas/_libs/reduction.pyx

-99
Original file line numberDiff line numberDiff line change
@@ -93,105 +93,6 @@ cdef class _BaseGrouper:
9393
return res, initialized
9494

9595

96-
cdef class SeriesBinGrouper(_BaseGrouper):
97-
"""
98-
Performs grouping operation according to bin edges, rather than labels
99-
"""
100-
cdef:
101-
Py_ssize_t nresults, ngroups
102-
103-
cdef public:
104-
ndarray bins # ndarray[int64_t]
105-
ndarray arr, index, dummy_arr, dummy_index
106-
object values, f, typ, ityp, name, idtype
107-
108-
def __init__(self, object series, object f, ndarray[int64_t] bins):
109-
110-
assert len(bins) > 0 # otherwise we get IndexError in get_result
111-
112-
self.bins = bins
113-
self.f = f
114-
115-
values = series.values
116-
if is_array(values) and not values.flags.c_contiguous:
117-
# e.g. Categorical has no `flags` attribute
118-
values = values.copy('C')
119-
self.arr = values
120-
self.typ = series._constructor
121-
self.ityp = series.index._constructor
122-
self.idtype = series.index.dtype
123-
self.index = series.index.values
124-
self.name = series.name
125-
126-
dummy = series.iloc[:0]
127-
self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
128-
129-
# kludge for #1688
130-
if len(bins) > 0 and bins[-1] == len(series):
131-
self.ngroups = len(bins)
132-
else:
133-
# TODO: not reached except in test_series_bin_grouper directly
134-
# constructing SeriesBinGrouper; can we rule this case out?
135-
self.ngroups = len(bins) + 1
136-
137-
def get_result(self):
138-
cdef:
139-
ndarray arr, result
140-
ndarray[int64_t] counts
141-
Py_ssize_t i, n, group_size, start, end
142-
object res
143-
bint initialized = 0
144-
Slider vslider, islider
145-
object cached_series = None, cached_index = None
146-
147-
counts = np.zeros(self.ngroups, dtype=np.int64)
148-
149-
if self.ngroups > 0:
150-
counts[0] = self.bins[0]
151-
for i in range(1, self.ngroups):
152-
if i == self.ngroups - 1:
153-
counts[i] = len(self.arr) - self.bins[i - 1]
154-
else:
155-
counts[i] = self.bins[i] - self.bins[i - 1]
156-
157-
group_size = 0
158-
n = len(self.arr)
159-
160-
vslider = Slider(self.arr, self.dummy_arr)
161-
islider = Slider(self.index, self.dummy_index)
162-
163-
result = np.empty(self.ngroups, dtype='O')
164-
165-
cached_index, cached_series = self._init_dummy_series_and_index(
166-
islider, vslider
167-
)
168-
169-
start = 0
170-
try:
171-
for i in range(self.ngroups):
172-
group_size = counts[i]
173-
end = start + group_size
174-
175-
islider.move(start, end)
176-
vslider.move(start, end)
177-
178-
self._update_cached_objs(
179-
cached_series, cached_index, islider, vslider)
180-
181-
res, initialized = self._apply_to_group(cached_series, cached_index,
182-
initialized)
183-
start += group_size
184-
185-
result[i] = res
186-
187-
finally:
188-
# so we don't free the wrong memory
189-
islider.reset()
190-
vslider.reset()
191-
192-
return result, counts
193-
194-
19596
cdef class SeriesGrouper(_BaseGrouper):
19697
"""
19798
Performs generic grouping operation while avoiding ndarray construction

pandas/core/groupby/ops.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,11 @@ def agg_series(
932932
# Preempt TypeError in _aggregate_series_fast
933933
result = self._aggregate_series_pure_python(obj, func)
934934

935+
elif isinstance(self, BinGrouper):
936+
# Not yet able to remove the BaseGrouper aggregate_series_fast,
937+
# as test_crosstab.test_categorical breaks without it
938+
result = self._aggregate_series_pure_python(obj, func)
939+
935940
else:
936941
result = self._aggregate_series_fast(obj, func)
937942

@@ -1149,15 +1154,9 @@ def groupings(self) -> list[grouper.Grouping]:
11491154

11501155
def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray:
11511156
# -> np.ndarray[object]
1152-
1153-
# At this point we have already checked that
1154-
# - obj.index is not a MultiIndex
1155-
# - obj is backed by an ndarray, not ExtensionArray
1156-
# - ngroups != 0
1157-
# - len(self.bins) > 0
1158-
sbg = libreduction.SeriesBinGrouper(obj, func, self.bins)
1159-
result, _ = sbg.get_result()
1160-
return result
1157+
raise NotImplementedError(
1158+
"This should not be reached; use _aggregate_series_pure_python"
1159+
)
11611160

11621161

11631162
def _is_indexed_like(obj, axes, axis: int) -> bool:

pandas/tests/groupby/test_bin_groupby.py

-15
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,6 @@ def test_series_grouper_requires_nonempty_raises():
5353
libreduction.SeriesGrouper(dummy, np.mean, labels, 2)
5454

5555

56-
def test_series_bin_grouper():
57-
obj = Series(np.random.randn(10))
58-
59-
bins = np.array([3, 6], dtype=np.int64)
60-
61-
grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins)
62-
result, counts = grouper.get_result()
63-
64-
expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()], dtype=object)
65-
tm.assert_almost_equal(result, expected)
66-
67-
exp_counts = np.array([3, 3, 4], dtype=np.int64)
68-
tm.assert_almost_equal(counts, exp_counts)
69-
70-
7156
def assert_block_lengths(x):
7257
assert len(x) == len(x._mgr.blocks[0].mgr_locs)
7358
return 0

0 commit comments

Comments
 (0)