From 4fb825f0860e0c1cebac638883c4d67c9fbf2188 Mon Sep 17 00:00:00 2001 From: Philippe Pepiot Date: Mon, 5 Jul 2021 18:50:31 +0200 Subject: [PATCH] BUG: fix regression with SerieGrouper with Timestamp index (#42390) This fixes a regression introduced in c355ed1 where cache is not initialized with correct state of islider and vslider. The first call of {v,i}slider.move() must be done before initializing the cache. On Timestamp index this trigger a "ValueError Length of values does not match length of index" Closes #42390 Signed-off-by: Philippe Pepiot --- pandas/_libs/reduction.pyx | 18 ++++++++++-------- pandas/tests/groupby/test_bin_groupby.py | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index d730084692dd4..9767af568ab3d 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -165,10 +165,6 @@ cdef class SeriesBinGrouper(_BaseGrouper): result = np.empty(self.ngroups, dtype='O') - cached_index, cached_series = self._init_dummy_series_and_index( - islider, vslider - ) - start = 0 try: for i in range(self.ngroups): @@ -178,6 +174,11 @@ cdef class SeriesBinGrouper(_BaseGrouper): islider.move(start, end) vslider.move(start, end) + if cached_index is None: + cached_index, cached_series = self._init_dummy_series_and_index( + islider, vslider + ) + self._update_cached_objs( cached_series, cached_index, islider, vslider) @@ -254,10 +255,6 @@ cdef class SeriesGrouper(_BaseGrouper): result = np.empty(self.ngroups, dtype='O') - cached_index, cached_series = self._init_dummy_series_and_index( - islider, vslider - ) - start = 0 try: for i in range(n): @@ -275,6 +272,11 @@ cdef class SeriesGrouper(_BaseGrouper): islider.move(start, end) vslider.move(start, end) + if cached_index is None: + cached_index, cached_series = self._init_dummy_series_and_index( + islider, vslider + ) + self._update_cached_objs( cached_series, cached_index, islider, vslider) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 92e5e709a9b2e..68aa28687742b 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -27,6 +27,23 @@ def test_series_grouper(): tm.assert_almost_equal(counts, exp_counts) +def test_series_grouper_timestamp(): + # GH 42390 + obj = Series([1], index=[pd.Timestamp("2018-01-16 00:00:00+00:00")], dtype=np.intp) + labels = np.array([0], dtype=np.intp) + + def agg(series): + # this should not raise + if series.isna().values.all(): + return None + return np.sum(series) + + grouper = libreduction.SeriesGrouper(obj, agg, labels, 1) + result, counts = grouper.get_result() + tm.assert_numpy_array_equal(result, np.array([1], dtype=object)) + tm.assert_numpy_array_equal(counts, np.array([1], dtype=np.int64)) + + def test_series_grouper_result_length_difference(): # GH 40014 obj = Series(np.random.randn(10), dtype="float64")