pandas-dev · BranYang · Feb 18, 2016 · Mar 2, 2016 · Mar 3, 2016 · Mar 5, 2016
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -1200,3 +1200,5 @@ Bug Fixes
 - Bug when initializing categorical series with a scalar value. (:issue:`12336`)
 - Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`)
 - Bug when increasing the buffer size of CSV reader in ``read_csv`` (:issue:`12494`)
+
+- Bug in ``.rolling`` in which apply on float32 data will raise a ``ValueError`` (:issue:`12373`)
diff --git a/pandas/core/window.py b/pandas/core/window.py
@@ -149,16 +149,17 @@ def _prep_values(self, values=None, kill_inf=True, how=None):
         if values is None:
             values = getattr(self._selected_obj, 'values', self._selected_obj)
 
-        # coerce dtypes as appropriate
+        # GH #12373 : rolling functions error on float32 data
+        # make sure the data is coerced to float64
         if com.is_float_dtype(values.dtype):
-            pass
+            values = com._ensure_float64(values)
         elif com.is_integer_dtype(values.dtype):
-            values = values.astype(float)
+            values = com._ensure_float64(values)
         elif com.is_timedelta64_dtype(values.dtype):
-            values = values.view('i8').astype(float)
+            values = com._ensure_float64(values.view('i8'))
         else:
             try:
-                values = values.astype(float)
+                values = com._ensure_float64(values)
             except (ValueError, TypeError):
                 raise TypeError("cannot handle this type -> {0}"
                                 "".format(values.dtype))
@@ -457,7 +458,9 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None,
 
                 def func(arg, window, min_periods=None):
                     minp = check_minp(min_periods, window)
-                    return cfunc(arg, window, minp, **kwargs)
+                    # GH #12373: rolling functions error on float32 data
+                    return cfunc(com._ensure_float64(arg),
+                                 window, minp, **kwargs)
 
             # calculation function
             if center:
@@ -494,6 +497,7 @@ def count(self):
         obj = self._convert_freq()
         window = self._get_window()
         window = min(window, len(obj)) if not self.center else window
+
         try:
             converted = np.isfinite(obj).astype(float)
         except TypeError:
@@ -657,6 +661,10 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
         window = self._get_window(other)
 
         def _get_cov(X, Y):
+            # GH #12373 : rolling functions error on float32 data
+            # to avoid potential overflow, cast the data to float64
+            X = X.astype('float64')
+            Y = Y.astype('float64')
             mean = lambda x: x.rolling(window, self.min_periods,
                                        center=self.center).mean(**kwargs)
             count = (X + Y).rolling(window=window,

diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
@@ -16,6 +16,7 @@
                                  assert_frame_equal, assert_panel_equal,
                                  assert_index_equal)
 import pandas.core.datetools as datetools
+import pandas.core.common as com
 import pandas.stats.moments as mom
 import pandas.core.window as rwindow
 from pandas.core.base import SpecificationError
@@ -289,6 +290,209 @@ def test_deprecations(self):
             mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0)
 
 
+# GH #12373 : rolling functions error on float32 data
+# make sure rolling functions works for different dtypes
+class TestDtype(Base):
+    dtype = None
+    window = 2
+    # the nan value, timedelta uses tslib.iNaT
+    naval = np.nan
+
+    # Function Name : (function, result_dtype, expectation_dtype)
+    funcs = {
+        'count': (lambda v: v.count(), 'float64', 'float64'),
+        'max': (lambda v: v.max(), 'float64', 'float64'),
+        'min': (lambda v: v.min(), 'float64', 'float64'),
+        'sum': (lambda v: v.sum(), 'float64', 'float64'),
+        'mean': (lambda v: v.mean(), 'float64', 'float64'),
+        'std': (lambda v: v.std(), 'float64', 'float64'),
+        'var': (lambda v: v.var(), 'float64', 'float64'),
+        'median': (lambda v: v.median(), 'float64', 'float64')
+    }
+
+    def get_expects(self):
+        expects = {
+            'sr1': {
+                'count': Series([1, 2, 2, 2, 2]),
+                'max': Series([self.naval, 1, 2, 3, 4]),
+                'min': Series([self.naval, 0, 1, 2, 3]),
+                'sum': Series([self.naval, 1, 3, 5, 7]),
+                'mean': Series([self.naval, .5, 1.5, 2.5, 3.5]),
+                'std': Series([self.naval] + [np.sqrt(.5)] * 4),
+                'var': Series([self.naval, .5, .5, .5, .5]),
+                'median': Series([self.naval, .5, 1.5, 2.5, 3.5])
+            },
+            'sr2': {
+                'count': Series([1, 2, 2, 2, 2]),
+                'max': Series([self.naval, 10, 8, 6, 4]),
+                'min': Series([self.naval, 8, 6, 4, 2]),
+                'sum': Series([self.naval, 18, 14, 10, 6]),
+                'mean': Series([self.naval, 9, 7, 5, 3]),
+                'std': Series([self.naval] + [np.sqrt(2)] * 4),
+                'var': Series([self.naval, 2, 2, 2, 2]),
+                'median': Series([self.naval, 9, 7, 5, 3])
+            },
+            'df': {
+                'count': DataFrame({0: Series([1, 2, 2, 2, 2]),
+                                    1: Series([1, 2, 2, 2, 2])}),
+                'max': DataFrame({0: Series([self.naval, 2, 4, 6, 8]),
+                                  1: Series([self.naval, 3, 5, 7, 9])}),
+                'min': DataFrame({0: Series([self.naval, 0, 2, 4, 6]),
+                                  1: Series([self.naval, 1, 3, 5, 7])}),
+                'sum': DataFrame({0: Series([self.naval, 2, 6, 10, 14]),
+                                  1: Series([self.naval, 4, 8, 12, 16])}),
+                'mean': DataFrame({0: Series([self.naval, 1, 3, 5, 7]),
+                                  1: Series([self.naval, 2, 4, 6, 8])}),
+                'std': DataFrame({0: Series([self.naval] + [np.sqrt(2)] * 4),
+                                  1: Series([self.naval] + [np.sqrt(2)] * 4)}),
+                'var': DataFrame({0: Series([self.naval, 2, 2, 2, 2]),
+                                  1: Series([self.naval, 2, 2, 2, 2])}),
+                'median': DataFrame({0: Series([self.naval, 1, 3, 5, 7]),
+                                     1: Series([self.naval, 2, 4, 6, 8])}),
+            }
+        }
+        return expects
+
+    def _create_dtype_data(self, dtype):
+        sr1 = Series(range(5), dtype=dtype)
+        sr2 = Series(range(10, 0, -2), dtype=dtype)
+        df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype)
+
+        data = {
+            'sr1': sr1,
+            'sr2': sr2,
+            'df': df
+        }
+
+        return data
+
+    def _create_data(self):
+        super(TestDtype, self)._create_data()
+        self.data = self._create_dtype_data(self.dtype)
+        self.expects = self.get_expects()
+
+    def setUp(self):
+        self._create_data()
+
+    def _cast_result(self, result, from_dtype, to_dtype):
+        if com.needs_i8_conversion(from_dtype):
+            if isinstance(result, Series):
+                result = result.view('i8')
+            elif isinstance(result, DataFrame):
+                final = []
+                for idx in result:
+                    final.append(Series(result[idx].view('i8')))
+                result = pd.concat(final, axis=1).reindex(
+                    columns=result.columns)
+        return result.astype(to_dtype)
+
+    def test_dtypes(self):
+        for f_name, d_name in product(self.funcs.keys(), self.data.keys()):
+            # Specify if the results and expectations
+            # need to be coerced to a given dtype
+            # once we changed the return value for roll_<function>,
+            # we should change coerce behavior here accordingly
+            f, res_dtype, exp_dtype = self.funcs[f_name]
+            d = self.data[d_name]
+            assert_equal = assert_series_equal if isinstance(
+                d, Series) else assert_frame_equal
+            exp = self.expects[d_name][f_name]
+            if exp_dtype:
+                exp = exp.astype(com.pandas_dtype(exp_dtype))
+
+            roll = d.rolling(window=self.window)
+            result = f(roll)
+            if res_dtype:
+                result = self._cast_result(result,
+                                           self.dtype,
+                                           com.pandas_dtype(res_dtype))
+            assert_equal(result, exp)
+
+
+class TestDtype_object(TestDtype):
+    dtype = object
+
+
+class TestDtype_int8(TestDtype):
+    dtype = np.int8
+
+
+class TestDtype_int16(TestDtype):
+    dtype = np.int16
+
+
+class TestDtype_int32(TestDtype):
+    dtype = np.int32
+
+
+class TestDtype_int64(TestDtype):
+    dtype = np.int64
+
+
+class TestDtype_uint8(TestDtype):
+    dtype = np.uint8
+
+
+class TestDtype_uint16(TestDtype):
+    dtype = np.uint16
+
+
+class TestDtype_uint32(TestDtype):
+    dtype = np.uint32
+
+
+class TestDtype_uint64(TestDtype):
+    dtype = np.uint64
+
+
+class TestDtype_float16(TestDtype):
+    dtype = np.float16
+
+
+class TestDtype_float32(TestDtype):
+    dtype = np.float32
+
+
+class TestDtype_float64(TestDtype):
+    dtype = np.float64
+
+
+class TestDtype_category(TestDtype):
+    dtype = 'category'
+    include_df = False
+
+    def _create_dtype_data(self, dtype):
+        sr1 = Series(range(5), dtype=dtype)
+        sr2 = Series(range(10, 0, -2), dtype=dtype)
+
+        data = {
+            'sr1': sr1,
+            'sr2': sr2
+        }
+
+        return data
+
+
+class TestDatetimeLikeDtype(TestDtype):
+    dtype = np.dtype('M8[ns]')
+
+    # GH #12373: rolling functions raise ValueError on float32 data
+    def setUp(self):
+        raise nose.SkipTest("Skip rolling on DatetimeLike dtypes.")
+
+    def test_dtypes(self):
+        with tm.assertRaises(TypeError):
+            super(TestDatetimeLikeDtype, self).test_dtypes()
+
+
+class TestDtype_timedelta(TestDatetimeLikeDtype):
+    dtype = np.dtype('m8[ns]')
+
+
+class TestDtype_datetime64UTC(TestDatetimeLikeDtype):
+    dtype = 'datetime64[ns, UTC]'
+
+
 class TestMoments(Base):
 
     def setUp(self):