Skip to content

Commit 4ac5a80

Browse files
committed
BUG: mean overflows for integer dtypes (fixes #10155)
1 parent 9b4d154 commit 4ac5a80

File tree

2 files changed

+31
-2
lines changed

2 files changed

+31
-2
lines changed

pandas/core/nanops.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,16 @@ def nansum(values, axis=None, skipna=True):
254254
@bottleneck_switch()
255255
def nanmean(values, axis=None, skipna=True):
256256
values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
257-
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_max))
258257
count = _get_counts(mask, axis)
259258

259+
dtype_sum = dtype_max
260+
if is_integer_dtype(dtype):
261+
dtype_sum = np.float64
262+
elif is_float_dtype(dtype):
263+
dtype_sum = dtype
264+
count = dtype.type(count)
265+
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
266+
260267
if axis is not None and getattr(the_sum, 'ndim', False):
261268
the_mean = the_sum / count
262269
ct_mask = count == 0

pandas/tests/test_nanops.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from pandas.core.common import isnull
8+
from pandas.core.common import isnull, is_integer_dtype
99
import pandas.core.nanops as nanops
1010
import pandas.util.testing as tm
1111

@@ -323,6 +323,28 @@ def test_nanmean(self):
323323
allow_complex=False, allow_obj=False,
324324
allow_str=False, allow_date=False, allow_tdelta=True)
325325

326+
def test_nanmean_overflow(self):
327+
# GH 10155
328+
# In the previous implementation mean can overflow for int dtypes, it
329+
# is now consistent with numpy
330+
from pandas import Series
331+
for a in [2 ** 55, -2 ** 55, 20150515061816532]:
332+
s = Series(a, index=range(500), dtype=np.int64)
333+
result = s.mean()
334+
np_result = s.values.mean()
335+
self.assertEqual(result, a)
336+
self.assertEqual(result, np_result)
337+
self.assertTrue(result.dtype == np.float64)
338+
339+
# check returned dtype
340+
for dtype in [np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]:
341+
s = Series(range(10), dtype=dtype)
342+
result = s.mean()
343+
if is_integer_dtype(dtype):
344+
self.assertTrue(result.dtype == np.float64)
345+
else:
346+
self.assertTrue(result.dtype == dtype)
347+
326348
def test_nanmedian(self):
327349
self.check_funs(nanops.nanmedian, np.median,
328350
allow_complex=False, allow_str=False, allow_date=False,

0 commit comments

Comments
 (0)