pandas-dev · jreback · Aug 29, 2015 · Aug 29, 2015
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -725,6 +725,7 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in incorrection computation of ``.mean()`` on ``timedelta64[ns]`` because of overflow (:issue:`9442`)
 - Bug in ``DataFrame.to_html(index=False)`` renders unnecessary ``name`` row (:issue:`10344`)
 - Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
 - Bug in ``to_datetime`` with invalid dates and formats supplied (:issue:`10154`)

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -63,6 +63,7 @@ def __str__(self):
 _int8_max = np.iinfo(np.int8).max
 _int16_max = np.iinfo(np.int16).max
 _int32_max = np.iinfo(np.int32).max
+_int64_max = np.iinfo(np.int64).max
 
 # define abstract base classes to enable isinstance type checking on our
 # objects

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -21,7 +21,8 @@
                                 is_bool_dtype, is_object_dtype,
                                 is_datetime64_dtype, is_timedelta64_dtype,
                                 is_datetime_or_timedelta_dtype, _get_dtype,
-                                is_int_or_datetime_dtype, is_any_int_dtype)
+                                is_int_or_datetime_dtype, is_any_int_dtype,
+                                _int64_max)
 
 
 class disallow(object):
@@ -145,7 +146,7 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
         else:
             if fill_value_typ == '+inf':
                 # need the max int here
-                return np.iinfo(np.int64).max
+                return _int64_max
             else:
                 return tslib.iNaT
 
@@ -223,7 +224,12 @@ def _wrap_results(result, dtype):
             result = result.view(dtype)
     elif is_timedelta64_dtype(dtype):
         if not isinstance(result, np.ndarray):
-            result = lib.Timedelta(result)
+
+            # raise if we have a timedelta64[ns] which is too large
+            if np.fabs(result) > _int64_max:
+                raise ValueError("overflow in timedelta operation")
+
+            result = lib.Timedelta(result, unit='ns')
         else:
             result = result.astype('i8').view(dtype)
 
@@ -247,6 +253,8 @@ def nansum(values, axis=None, skipna=True):
     dtype_sum = dtype_max
     if is_float_dtype(dtype):
         dtype_sum = dtype
+    elif is_timedelta64_dtype(dtype):
+        dtype_sum = np.float64
     the_sum = values.sum(axis, dtype=dtype_sum)
     the_sum = _maybe_null_out(the_sum, axis, mask)
 
@@ -260,7 +268,7 @@ def nanmean(values, axis=None, skipna=True):
 
     dtype_sum = dtype_max
     dtype_count = np.float64
-    if is_integer_dtype(dtype):
+    if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype):
         dtype_sum = np.float64
     elif is_float_dtype(dtype):
         dtype_sum = dtype

diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
@@ -686,6 +686,25 @@ def test_timedelta_ops(self):
         s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15')])
         self.assertEqual(s.diff().median(), timedelta(days=6))
 
+    def test_overflow(self):
+        # GH 9442
+        s = Series(pd.date_range('20130101',periods=100000,freq='H'))
+        s[0] += pd.Timedelta('1s 1ms')
+
+        # mean
+        result = (s-s.min()).mean()
+        expected = pd.Timedelta((pd.DatetimeIndex((s-s.min())).asi8/len(s)).sum())
+
+        # the computation is converted to float so might be some loss of precision
+        self.assertTrue(np.allclose(result.value/1000, expected.value/1000))
+
+        # sum
+        self.assertRaises(ValueError, lambda : (s-s.min()).sum())
+        s1 = s[0:10000]
+        self.assertRaises(ValueError, lambda : (s1-s1.min()).sum())
+        s2 = s[0:1000]
+        result = (s2-s2.min()).sum()
+
     def test_timedelta_ops_scalar(self):
         # GH 6808
         base = pd.to_datetime('20130101 09:01:12.123456')