Skip to content

Commit 3d85fc7

Browse files
committed
BUG: mean overflows for integer dtypes (fixes #10155)
1 parent 0aceb38 commit 3d85fc7

File tree

3 files changed

+36
-2
lines changed

3 files changed

+36
-2
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ Bug Fixes
6363
- Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
6464

6565

66+
- Bug in ``mean()`` where integer dtypes can overflow (:issue:`10172`)
6667
- Bug where Panel.from_dict does not set dtype when specified (:issue:`10058`)
6768
- Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
6869
- Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)

pandas/core/nanops.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,16 @@ def nansum(values, axis=None, skipna=True):
254254
@bottleneck_switch()
255255
def nanmean(values, axis=None, skipna=True):
256256
values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
257-
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_max))
258257
count = _get_counts(mask, axis)
259258

259+
dtype_sum = dtype_max
260+
if is_integer_dtype(dtype):
261+
dtype_sum = np.float64
262+
elif is_float_dtype(dtype):
263+
dtype_sum = dtype
264+
count = dtype.type(count)
265+
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
266+
260267
if axis is not None and getattr(the_sum, 'ndim', False):
261268
the_mean = the_sum / count
262269
ct_mask = count == 0

pandas/tests/test_nanops.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from pandas.core.common import isnull
8+
from pandas.core.common import isnull, is_integer_dtype
99
import pandas.core.nanops as nanops
1010
import pandas.util.testing as tm
1111

@@ -323,6 +323,32 @@ def test_nanmean(self):
323323
allow_complex=False, allow_obj=False,
324324
allow_str=False, allow_date=False, allow_tdelta=True)
325325

326+
def test_nanmean_overflow(self):
327+
# GH 10155
328+
# In the previous implementation mean can overflow for int dtypes, it
329+
# is now consistent with numpy
330+
from pandas import Series
331+
332+
# numpy < 1.9.0 is not computing this correctly
333+
from distutils.version import LooseVersion
334+
if LooseVersion(np.__version__) >= '1.9.0':
335+
for a in [2 ** 55, -2 ** 55, 20150515061816532]:
336+
s = Series(a, index=range(500), dtype=np.int64)
337+
result = s.mean()
338+
np_result = s.values.mean()
339+
self.assertEqual(result, a)
340+
self.assertEqual(result, np_result)
341+
self.assertTrue(result.dtype == np.float64)
342+
343+
# check returned dtype
344+
for dtype in [np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]:
345+
s = Series(range(10), dtype=dtype)
346+
result = s.mean()
347+
if is_integer_dtype(dtype):
348+
self.assertTrue(result.dtype == np.float64)
349+
else:
350+
self.assertTrue(result.dtype == dtype)
351+
326352
def test_nanmedian(self):
327353
self.check_funs(nanops.nanmedian, np.median,
328354
allow_complex=False, allow_str=False, allow_date=False,

0 commit comments

Comments
 (0)