Skip to content

Fix #12373: rolling functions raise ValueError on float32 data #12376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1200,3 +1200,5 @@ Bug Fixes
- Bug when initializing categorical series with a scalar value. (:issue:`12336`)
- Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`)
- Bug when increasing the buffer size of CSV reader in ``read_csv`` (:issue:`12494`)

- Bug in ``.rolling`` in which apply on float32 data will raise a ``ValueError`` (:issue:`12373`)
20 changes: 14 additions & 6 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,16 +149,17 @@ def _prep_values(self, values=None, kill_inf=True, how=None):
if values is None:
values = getattr(self._selected_obj, 'values', self._selected_obj)

# coerce dtypes as appropriate
# GH #12373 : rolling functions error on float32 data
# make sure the data is coerced to float64
if com.is_float_dtype(values.dtype):
pass
values = com._ensure_float64(values)
elif com.is_integer_dtype(values.dtype):
values = values.astype(float)
values = com._ensure_float64(values)
elif com.is_timedelta64_dtype(values.dtype):
values = values.view('i8').astype(float)
values = com._ensure_float64(values.view('i8'))
else:
try:
values = values.astype(float)
values = com._ensure_float64(values)
except (ValueError, TypeError):
raise TypeError("cannot handle this type -> {0}"
"".format(values.dtype))
Expand Down Expand Up @@ -457,7 +458,9 @@ def _apply(self, func, window=None, center=None, check_minp=None, how=None,

def func(arg, window, min_periods=None):
minp = check_minp(min_periods, window)
return cfunc(arg, window, minp, **kwargs)
# GH #12373: rolling functions error on float32 data
return cfunc(com._ensure_float64(arg),
window, minp, **kwargs)

# calculation function
if center:
Expand Down Expand Up @@ -494,6 +497,7 @@ def count(self):
obj = self._convert_freq()
window = self._get_window()
window = min(window, len(obj)) if not self.center else window

try:
converted = np.isfinite(obj).astype(float)
except TypeError:
Expand Down Expand Up @@ -657,6 +661,10 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
window = self._get_window(other)

def _get_cov(X, Y):
# GH #12373 : rolling functions error on float32 data
# to avoid potential overflow, cast the data to float64
X = X.astype('float64')
Y = Y.astype('float64')
mean = lambda x: x.rolling(window, self.min_periods,
center=self.center).mean(**kwargs)
count = (X + Y).rolling(window=window,
Expand Down
187 changes: 187 additions & 0 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,193 @@ def test_deprecations(self):
mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0)


# GH #12373 : rolling functions error on float32 data
# make sure rolling functions works for different dtypes
class TestDtype(Base):
dtype = None
window = 2

funcs = {
'count': lambda v: v.count(),
'max': lambda v: v.max(),
'min': lambda v: v.min(),
'sum': lambda v: v.sum(),
'mean': lambda v: v.mean(),
'std': lambda v: v.std(),
'var': lambda v: v.var(),
'median': lambda v: v.median()
}

def get_expects(self):
expects = {
'sr1': {
'count': Series([1, 2, 2, 2, 2], dtype='float64'),
'max': Series([np.nan, 1, 2, 3, 4], dtype='float64'),
'min': Series([np.nan, 0, 1, 2, 3], dtype='float64'),
'sum': Series([np.nan, 1, 3, 5, 7], dtype='float64'),
'mean': Series([np.nan, .5, 1.5, 2.5, 3.5], dtype='float64'),
'std': Series([np.nan] + [np.sqrt(.5)] * 4, dtype='float64'),
'var': Series([np.nan, .5, .5, .5, .5], dtype='float64'),
'median': Series([np.nan, .5, 1.5, 2.5, 3.5], dtype='float64')
},
'sr2': {
'count': Series([1, 2, 2, 2, 2], dtype='float64'),
'max': Series([np.nan, 10, 8, 6, 4], dtype='float64'),
'min': Series([np.nan, 8, 6, 4, 2], dtype='float64'),
'sum': Series([np.nan, 18, 14, 10, 6], dtype='float64'),
'mean': Series([np.nan, 9, 7, 5, 3], dtype='float64'),
'std': Series([np.nan] + [np.sqrt(2)] * 4, dtype='float64'),
'var': Series([np.nan, 2, 2, 2, 2], dtype='float64'),
'median': Series([np.nan, 9, 7, 5, 3], dtype='float64')
},
'df': {
'count': DataFrame({0: Series([1, 2, 2, 2, 2]),
1: Series([1, 2, 2, 2, 2])},
dtype='float64'),
'max': DataFrame({0: Series([np.nan, 2, 4, 6, 8]),
1: Series([np.nan, 3, 5, 7, 9])},
dtype='float64'),
'min': DataFrame({0: Series([np.nan, 0, 2, 4, 6]),
1: Series([np.nan, 1, 3, 5, 7])},
dtype='float64'),
'sum': DataFrame({0: Series([np.nan, 2, 6, 10, 14]),
1: Series([np.nan, 4, 8, 12, 16])},
dtype='float64'),
'mean': DataFrame({0: Series([np.nan, 1, 3, 5, 7]),
1: Series([np.nan, 2, 4, 6, 8])},
dtype='float64'),
'std': DataFrame({0: Series([np.nan] + [np.sqrt(2)] * 4),
1: Series([np.nan] + [np.sqrt(2)] * 4)},
dtype='float64'),
'var': DataFrame({0: Series([np.nan, 2, 2, 2, 2]),
1: Series([np.nan, 2, 2, 2, 2])},
dtype='float64'),
'median': DataFrame({0: Series([np.nan, 1, 3, 5, 7]),
1: Series([np.nan, 2, 4, 6, 8])},
dtype='float64'),
}
}
return expects

def _create_dtype_data(self, dtype):
sr1 = Series(range(5), dtype=dtype)
sr2 = Series(range(10, 0, -2), dtype=dtype)
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype)

data = {
'sr1': sr1,
'sr2': sr2,
'df': df
}

return data

def _create_data(self):
super(TestDtype, self)._create_data()
self.data = self._create_dtype_data(self.dtype)
self.expects = self.get_expects()

def setUp(self):
self._create_data()

def test_dtypes(self):
for f_name, d_name in product(self.funcs.keys(), self.data.keys()):
f = self.funcs[f_name]
d = self.data[d_name]
assert_equal = assert_series_equal if isinstance(
d, Series) else assert_frame_equal
exp = self.expects[d_name][f_name]

roll = d.rolling(window=self.window)
result = f(roll)

assert_equal(result, exp)


class TestDtype_object(TestDtype):
dtype = object


class TestDtype_int8(TestDtype):
dtype = np.int8


class TestDtype_int16(TestDtype):
dtype = np.int16


class TestDtype_int32(TestDtype):
dtype = np.int32


class TestDtype_int64(TestDtype):
dtype = np.int64


class TestDtype_uint8(TestDtype):
dtype = np.uint8


class TestDtype_uint16(TestDtype):
dtype = np.uint16


class TestDtype_uint32(TestDtype):
dtype = np.uint32


class TestDtype_uint64(TestDtype):
dtype = np.uint64


class TestDtype_float16(TestDtype):
dtype = np.float16


class TestDtype_float32(TestDtype):
dtype = np.float32


class TestDtype_float64(TestDtype):
dtype = np.float64


class TestDtype_category(TestDtype):
dtype = 'category'
include_df = False

def _create_dtype_data(self, dtype):
sr1 = Series(range(5), dtype=dtype)
sr2 = Series(range(10, 0, -2), dtype=dtype)

data = {
'sr1': sr1,
'sr2': sr2
}

return data


class TestDatetimeLikeDtype(TestDtype):
dtype = np.dtype('M8[ns]')

# GH #12373: rolling functions raise ValueError on float32 data
def setUp(self):
raise nose.SkipTest("Skip rolling on DatetimeLike dtypes.")

def test_dtypes(self):
with tm.assertRaises(TypeError):
super(TestDatetimeLikeDtype, self).test_dtypes()


class TestDtype_timedelta(TestDatetimeLikeDtype):
dtype = np.dtype('m8[ns]')


class TestDtype_datetime64UTC(TestDatetimeLikeDtype):
dtype = 'datetime64[ns, UTC]'


class TestMoments(Base):

def setUp(self):
Expand Down