Skip to content

Commit 3422fcc

Browse files
parthi-sivarhshadrach
authored andcommitted
BUG: Fix np.inf + np.nan sum issue on groupby mean (pandas-dev#52964)
* BUG: Fix np.inf + np.nan sum issue on groupby mean * BUG: Change variable name * TST: add test case to validate the fix * Bug: Set Compensation to 0 when it is NaN * TST: Fix failing test * Remove Space * Add Comments * TST: assign expected to seperate variable * Update comment * TST: Fix issue with Linux32 dtype ValueError --------- Co-authored-by: Richard Shadrach <[email protected]>
1 parent a643add commit 3422fcc

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

pandas/_libs/groupby.pyx

+7
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,13 @@ def group_mean(
10751075
y = val - compensation[lab, j]
10761076
t = sumx[lab, j] + y
10771077
compensation[lab, j] = t - sumx[lab, j] - y
1078+
if compensation[lab, j] != compensation[lab, j]:
1079+
# GH#50367
1080+
# If val is +/- infinity, compensation is NaN
1081+
# which would lead to results being NaN instead
1082+
# of +/-infinity. We cannot use util.is_nan
1083+
# because of no gil
1084+
compensation[lab, j] = 0.
10781085
sumx[lab, j] = t
10791086

10801087
for i in range(ncounts):

pandas/tests/groupby/test_libgroupby.py

+20
Original file line numberDiff line numberDiff line change
@@ -282,3 +282,23 @@ def test_cython_group_mean_not_datetimelike_but_has_NaT_values():
282282
tm.assert_numpy_array_equal(
283283
actual[:, 0], np.array(np.divide(np.add(data[0], data[1]), 2), dtype="float64")
284284
)
285+
286+
287+
def test_cython_group_mean_Inf_at_begining_and_end():
288+
# GH 50367
289+
actual = np.array([[np.nan, np.nan], [np.nan, np.nan]], dtype="float64")
290+
counts = np.array([0, 0], dtype="int64")
291+
data = np.array(
292+
[[np.inf, 1.0], [1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5, np.inf]],
293+
dtype="float64",
294+
)
295+
labels = np.array([0, 1, 0, 1, 0, 1], dtype=np.intp)
296+
297+
group_mean(actual, counts, data, labels, is_datetimelike=False)
298+
299+
expected = np.array([[np.inf, 3], [3, np.inf]], dtype="float64")
300+
301+
tm.assert_numpy_array_equal(
302+
actual,
303+
expected,
304+
)

0 commit comments

Comments
 (0)