BUG: mean overflows for integer dtypes (fixes #10155)

mortada · mortada · commit 4155bbbeb184 · 2015-05-20T10:24:04.000-07:00
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -254,9 +254,16 @@ def nansum(values, axis=None, skipna=True):
 @bottleneck_switch()
 def nanmean(values, axis=None, skipna=True):
     values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
-    the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_max))
     count = _get_counts(mask, axis)
 
+    dtype_sum = dtype_max
+    if is_integer_dtype(dtype):
+        dtype_sum = np.float64
+    elif is_float_dtype(dtype):
+        dtype_sum = dtype
+        count = dtype.type(count)
+    the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
+
     if axis is not None and getattr(the_sum, 'ndim', False):
         the_mean = the_sum / count
         ct_mask = count == 0
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from pandas.core.common import isnull
+from pandas.core.common import isnull, is_integer_dtype
 import pandas.core.nanops as nanops
 import pandas.util.testing as tm
 
@@ -323,6 +323,28 @@ def test_nanmean(self):
                         allow_complex=False, allow_obj=False,
                         allow_str=False, allow_date=False, allow_tdelta=True)
 
+    def test_nanmean_overflow(self):
+        # GH 10155
+        # In the previous implementation mean can overflow for int dtypes, it
+        # is now consistent with numpy
+        from pandas import Series
+        for a in [2 ** 55, -2 ** 55, 20150515061816532]:
+            s = Series(a, index=range(10), dtype=np.int64)
+            result = s.mean()
+            np_result = s.values.mean()
+            self.assertEqual(result, a)
+            self.assertEqual(result, np_result)
+            self.assertTrue(result.dtype == np.float64)
+
+        # check returned dtype
+        for dtype in [np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]:
+            s = Series(range(10), dtype=dtype)
+            result = s.mean()
+            if is_integer_dtype(dtype):
+                self.assertTrue(result.dtype == np.float64)
+            else:
+                self.assertTrue(result.dtype == dtype)
+
     def test_nanmedian(self):
         self.check_funs(nanops.nanmedian, np.median,
                         allow_complex=False, allow_str=False, allow_date=False,