From 7472fe57be22acf31432441487b2b60ebb8d3d96 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 14:55:06 -0800 Subject: [PATCH 1/2] CLN: reachable cases in Reducer --- pandas/_libs/reduction.pyx | 48 ++++++++---------------- pandas/tests/groupby/test_bin_groupby.py | 15 ++++---- 2 files changed, 23 insertions(+), 40 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 11dc2d04bb74e..0eb2050f9ece0 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -81,12 +81,10 @@ cdef class Reducer: else: - # we passed a series-like - if hasattr(dummy, 'values'): - - typ = type(dummy) - index = getattr(dummy, 'index', None) - dummy = dummy.values + # we passed a Series + typ = type(dummy) + index = dummy.index + dummy = dummy.values if dummy.dtype != self.arr.dtype: raise ValueError('Dummy array must be same dtype') @@ -99,9 +97,8 @@ cdef class Reducer: cdef: char* dummy_buf ndarray arr, result, chunk - Py_ssize_t i, incr + Py_ssize_t i flatiter it - bint has_labels object res, name, labels, index object cached_typ = None @@ -110,9 +107,6 @@ cdef class Reducer: dummy_buf = chunk.data chunk.data = arr.data labels = self.labels - has_labels = labels is not None - has_index = self.index is not None - incr = self.increment result = np.empty(self.nresults, dtype='O') it = PyArray_IterNew(result) @@ -120,33 +114,19 @@ cdef class Reducer: try: for i in range(self.nresults): - if has_labels: - name = labels[i] - else: - name = None - # create the cached type # each time just reassign the data if i == 0: if self.typ is not None: - - # recreate with the index if supplied - if has_index: - - cached_typ = self.typ( - chunk, index=self.index, name=name) - - else: - - # use the passsed typ, sans index - cached_typ = self.typ(chunk, name=name) + # In this case, we also have self.index + name = labels[i] + cached_typ = self.typ(chunk, index=self.index, name=name) # use the cached_typ if possible if cached_typ is not None: - - if has_index: - object.__setattr__(cached_typ, 'index', self.index) + # In this case, we also have non-None labels + name = labels[i] object.__setattr__( cached_typ._data._block, 'values', chunk) @@ -611,18 +591,22 @@ cdef class BlockSlider: arr.shape[1] = 0 -def compute_reduction(arr, f, axis=0, dummy=None, labels=None): +def compute_reduction(arr: np.ndarray, f, axis: int = 0, dummy=None, labels=None): """ Parameters ----------- - arr : NDFrame object + arr : np.ndarray f : function axis : integer axis dummy : type of reduced output (series) labels : Index or None """ + # We either have both dummy and labels, or neither of them + if (labels is None) ^ (dummy is None): + raise ValueError("Must pass either dummy and labels, or neither") + if labels is not None: # Caller is responsible for ensuring we don't have MultiIndex assert labels.nlevels == 1 diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 4ede6b165c691..c95ee0f216358 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -116,15 +116,14 @@ class TestMoments: class TestReducer: def test_int_index(self): arr = np.random.randn(100, 4) - result = libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4))) - expected = arr.sum(0) - tm.assert_almost_equal(result, expected) - result = libreduction.compute_reduction( - arr, np.sum, axis=1, labels=Index(np.arange(100)) - ) - expected = arr.sum(1) - tm.assert_almost_equal(result, expected) + msg = "Must pass either dummy and labels, or neither" + # we must pass either both labels and dummy, or neither + with pytest.raises(ValueError, match=msg): + libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4))) + + with pytest.raises(ValueError, match=msg): + libreduction.compute_reduction(arr, np.sum, axis=1, labels=Index(np.arange(100))) dummy = Series(0.0, index=np.arange(100)) result = libreduction.compute_reduction( From 13497590566ec523119374e94aa0e8fc330da47a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 20:05:16 -0800 Subject: [PATCH 2/2] blackify --- pandas/tests/groupby/test_bin_groupby.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index c95ee0f216358..fcdf599e4ba33 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -123,7 +123,9 @@ def test_int_index(self): libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4))) with pytest.raises(ValueError, match=msg): - libreduction.compute_reduction(arr, np.sum, axis=1, labels=Index(np.arange(100))) + libreduction.compute_reduction( + arr, np.sum, axis=1, labels=Index(np.arange(100)) + ) dummy = Series(0.0, index=np.arange(100)) result = libreduction.compute_reduction(