REF: remove libreduction.SeriesBinGrouper (pandas-dev#43189)

jbrockmendel · web-flow · commit c2aaf8ee8a54 · 2021-08-31T19:40:13.000-04:00
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -93,105 +93,6 @@ cdef class _BaseGrouper:
         return res, initialized
 
 
-cdef class SeriesBinGrouper(_BaseGrouper):
-    """
-    Performs grouping operation according to bin edges, rather than labels
-    """
-    cdef:
-        Py_ssize_t nresults, ngroups
-
-    cdef public:
-        ndarray bins  # ndarray[int64_t]
-        ndarray arr, index, dummy_arr, dummy_index
-        object values, f, typ, ityp, name, idtype
-
-    def __init__(self, object series, object f, ndarray[int64_t] bins):
-
-        assert len(bins) > 0  # otherwise we get IndexError in get_result
-
-        self.bins = bins
-        self.f = f
-
-        values = series.values
-        if is_array(values) and not values.flags.c_contiguous:
-            # e.g. Categorical has no `flags` attribute
-            values = values.copy('C')
-        self.arr = values
-        self.typ = series._constructor
-        self.ityp = series.index._constructor
-        self.idtype = series.index.dtype
-        self.index = series.index.values
-        self.name = series.name
-
-        dummy = series.iloc[:0]
-        self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
-
-        # kludge for #1688
-        if len(bins) > 0 and bins[-1] == len(series):
-            self.ngroups = len(bins)
-        else:
-            # TODO: not reached except in test_series_bin_grouper directly
-            #  constructing SeriesBinGrouper; can we rule this case out?
-            self.ngroups = len(bins) + 1
-
-    def get_result(self):
-        cdef:
-            ndarray arr, result
-            ndarray[int64_t] counts
-            Py_ssize_t i, n, group_size, start, end
-            object res
-            bint initialized = 0
-            Slider vslider, islider
-            object cached_series = None, cached_index = None
-
-        counts = np.zeros(self.ngroups, dtype=np.int64)
-
-        if self.ngroups > 0:
-            counts[0] = self.bins[0]
-            for i in range(1, self.ngroups):
-                if i == self.ngroups - 1:
-                    counts[i] = len(self.arr) - self.bins[i - 1]
-                else:
-                    counts[i] = self.bins[i] - self.bins[i - 1]
-
-        group_size = 0
-        n = len(self.arr)
-
-        vslider = Slider(self.arr, self.dummy_arr)
-        islider = Slider(self.index, self.dummy_index)
-
-        result = np.empty(self.ngroups, dtype='O')
-
-        cached_index, cached_series = self._init_dummy_series_and_index(
-            islider, vslider
-        )
-
-        start = 0
-        try:
-            for i in range(self.ngroups):
-                group_size = counts[i]
-                end = start + group_size
-
-                islider.move(start, end)
-                vslider.move(start, end)
-
-                self._update_cached_objs(
-                    cached_series, cached_index, islider, vslider)
-
-                res, initialized = self._apply_to_group(cached_series, cached_index,
-                                                        initialized)
-                start += group_size
-
-                result[i] = res
-
-        finally:
-            # so we don't free the wrong memory
-            islider.reset()
-            vslider.reset()
-
-        return result, counts
-
-
 cdef class SeriesGrouper(_BaseGrouper):
     """
     Performs generic grouping operation while avoiding ndarray construction
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -932,6 +932,11 @@ def agg_series(
             # Preempt TypeError in _aggregate_series_fast
             result = self._aggregate_series_pure_python(obj, func)
 
+        elif isinstance(self, BinGrouper):
+            # Not yet able to remove the BaseGrouper aggregate_series_fast,
+            #  as test_crosstab.test_categorical breaks without it
+            result = self._aggregate_series_pure_python(obj, func)
+
         else:
             result = self._aggregate_series_fast(obj, func)
 
@@ -1149,15 +1154,9 @@ def groupings(self) -> list[grouper.Grouping]:
 
     def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray:
         # -> np.ndarray[object]
-
-        # At this point we have already checked that
-        #  - obj.index is not a MultiIndex
-        #  - obj is backed by an ndarray, not ExtensionArray
-        #  - ngroups != 0
-        #  - len(self.bins) > 0
-        sbg = libreduction.SeriesBinGrouper(obj, func, self.bins)
-        result, _ = sbg.get_result()
-        return result
+        raise NotImplementedError(
+            "This should not be reached; use _aggregate_series_pure_python"
+        )
 
 
 def _is_indexed_like(obj, axes, axis: int) -> bool:
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
@@ -53,21 +53,6 @@ def test_series_grouper_requires_nonempty_raises():
         libreduction.SeriesGrouper(dummy, np.mean, labels, 2)
 
 
-def test_series_bin_grouper():
-    obj = Series(np.random.randn(10))
-
-    bins = np.array([3, 6], dtype=np.int64)
-
-    grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins)
-    result, counts = grouper.get_result()
-
-    expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()], dtype=object)
-    tm.assert_almost_equal(result, expected)
-
-    exp_counts = np.array([3, 3, 4], dtype=np.int64)
-    tm.assert_almost_equal(counts, exp_counts)
-
-
 def assert_block_lengths(x):
     assert len(x) == len(x._mgr.blocks[0].mgr_locs)
     return 0