diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index c62e60b7cdaa0..0a6ac9240ce99 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1075,6 +1075,13 @@ def group_mean( y = val - compensation[lab, j] t = sumx[lab, j] + y compensation[lab, j] = t - sumx[lab, j] - y + if compensation[lab, j] != compensation[lab, j]: + # GH#50367 + # If val is +/- infinity, compensation is NaN + # which would lead to results being NaN instead + # of +/-infinity. We cannot use util.is_nan + # because of no gil + compensation[lab, j] = 0. sumx[lab, j] = t for i in range(ncounts): diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index 9552d67bfe992..d10bcf9053d1a 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -282,3 +282,23 @@ def test_cython_group_mean_not_datetimelike_but_has_NaT_values(): tm.assert_numpy_array_equal( actual[:, 0], np.array(np.divide(np.add(data[0], data[1]), 2), dtype="float64") ) + + +def test_cython_group_mean_Inf_at_begining_and_end(): + # GH 50367 + actual = np.array([[np.nan, np.nan], [np.nan, np.nan]], dtype="float64") + counts = np.array([0, 0], dtype="int64") + data = np.array( + [[np.inf, 1.0], [1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5, np.inf]], + dtype="float64", + ) + labels = np.array([0, 1, 0, 1, 0, 1], dtype=np.intp) + + group_mean(actual, counts, data, labels, is_datetimelike=False) + + expected = np.array([[np.inf, 3], [3, np.inf]], dtype="float64") + + tm.assert_numpy_array_equal( + actual, + expected, + )