-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Fix #12373: rolling functions raise ValueError on float32 data #12376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
73f8823
9a17990
d20adfa
d26a680
9241d76
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
assert_frame_equal, assert_panel_equal, | ||
assert_index_equal) | ||
import pandas.core.datetools as datetools | ||
import pandas.core.common as com | ||
import pandas.stats.moments as mom | ||
import pandas.core.window as rwindow | ||
from pandas.core.base import SpecificationError | ||
|
@@ -289,6 +290,209 @@ def test_deprecations(self): | |
mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) | ||
|
||
|
||
# GH #12373 : rolling functions error on float32 data | ||
# make sure rolling functions works for different dtypes | ||
class TestDtype(Base): | ||
dtype = None | ||
window = 2 | ||
# the nan value, timedelta uses tslib.iNaT | ||
naval = np.nan | ||
|
||
# Function Name : (function, result_dtype, expectation_dtype) | ||
funcs = { | ||
'count': (lambda v: v.count(), 'float64', 'float64'), | ||
'max': (lambda v: v.max(), 'float64', 'float64'), | ||
'min': (lambda v: v.min(), 'float64', 'float64'), | ||
'sum': (lambda v: v.sum(), 'float64', 'float64'), | ||
'mean': (lambda v: v.mean(), 'float64', 'float64'), | ||
'std': (lambda v: v.std(), 'float64', 'float64'), | ||
'var': (lambda v: v.var(), 'float64', 'float64'), | ||
'median': (lambda v: v.median(), 'float64', 'float64') | ||
} | ||
|
||
def get_expects(self): | ||
expects = { | ||
'sr1': { | ||
'count': Series([1, 2, 2, 2, 2]), | ||
'max': Series([self.naval, 1, 2, 3, 4]), | ||
'min': Series([self.naval, 0, 1, 2, 3]), | ||
'sum': Series([self.naval, 1, 3, 5, 7]), | ||
'mean': Series([self.naval, .5, 1.5, 2.5, 3.5]), | ||
'std': Series([self.naval] + [np.sqrt(.5)] * 4), | ||
'var': Series([self.naval, .5, .5, .5, .5]), | ||
'median': Series([self.naval, .5, 1.5, 2.5, 3.5]) | ||
}, | ||
'sr2': { | ||
'count': Series([1, 2, 2, 2, 2]), | ||
'max': Series([self.naval, 10, 8, 6, 4]), | ||
'min': Series([self.naval, 8, 6, 4, 2]), | ||
'sum': Series([self.naval, 18, 14, 10, 6]), | ||
'mean': Series([self.naval, 9, 7, 5, 3]), | ||
'std': Series([self.naval] + [np.sqrt(2)] * 4), | ||
'var': Series([self.naval, 2, 2, 2, 2]), | ||
'median': Series([self.naval, 9, 7, 5, 3]) | ||
}, | ||
'df': { | ||
'count': DataFrame({0: Series([1, 2, 2, 2, 2]), | ||
1: Series([1, 2, 2, 2, 2])}), | ||
'max': DataFrame({0: Series([self.naval, 2, 4, 6, 8]), | ||
1: Series([self.naval, 3, 5, 7, 9])}), | ||
'min': DataFrame({0: Series([self.naval, 0, 2, 4, 6]), | ||
1: Series([self.naval, 1, 3, 5, 7])}), | ||
'sum': DataFrame({0: Series([self.naval, 2, 6, 10, 14]), | ||
1: Series([self.naval, 4, 8, 12, 16])}), | ||
'mean': DataFrame({0: Series([self.naval, 1, 3, 5, 7]), | ||
1: Series([self.naval, 2, 4, 6, 8])}), | ||
'std': DataFrame({0: Series([self.naval] + [np.sqrt(2)] * 4), | ||
1: Series([self.naval] + [np.sqrt(2)] * 4)}), | ||
'var': DataFrame({0: Series([self.naval, 2, 2, 2, 2]), | ||
1: Series([self.naval, 2, 2, 2, 2])}), | ||
'median': DataFrame({0: Series([self.naval, 1, 3, 5, 7]), | ||
1: Series([self.naval, 2, 4, 6, 8])}), | ||
} | ||
} | ||
return expects | ||
|
||
def _create_dtype_data(self, dtype): | ||
sr1 = Series(range(5), dtype=dtype) | ||
sr2 = Series(range(10, 0, -2), dtype=dtype) | ||
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) | ||
|
||
data = { | ||
'sr1': sr1, | ||
'sr2': sr2, | ||
'df': df | ||
} | ||
|
||
return data | ||
|
||
def _create_data(self): | ||
super(TestDtype, self)._create_data() | ||
self.data = self._create_dtype_data(self.dtype) | ||
self.expects = self.get_expects() | ||
|
||
def setUp(self): | ||
self._create_data() | ||
|
||
def _cast_result(self, result, from_dtype, to_dtype): | ||
if com.needs_i8_conversion(from_dtype): | ||
if isinstance(result, Series): | ||
result = result.view('i8') | ||
elif isinstance(result, DataFrame): | ||
final = [] | ||
for idx in result: | ||
final.append(Series(result[idx].view('i8'))) | ||
result = pd.concat(final, axis=1).reindex( | ||
columns=result.columns) | ||
return result.astype(to_dtype) | ||
|
||
def test_dtypes(self): | ||
for f_name, d_name in product(self.funcs.keys(), self.data.keys()): | ||
# Specify if the results and expectations | ||
# need to be coerced to a given dtype | ||
# once we changed the return value for roll_<function>, | ||
# we should change coerce behavior here accordingly | ||
f, res_dtype, exp_dtype = self.funcs[f_name] | ||
d = self.data[d_name] | ||
assert_equal = assert_series_equal if isinstance( | ||
d, Series) else assert_frame_equal | ||
exp = self.expects[d_name][f_name] | ||
if exp_dtype: | ||
exp = exp.astype(com.pandas_dtype(exp_dtype)) | ||
|
||
roll = d.rolling(window=self.window) | ||
result = f(roll) | ||
if res_dtype: | ||
result = self._cast_result(result, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls take all casting out. The expected should already have the expectation of casts built in. If something is later changed, then it will obvious as the expecations are listed. They may be wrong, but that's ok for now (when I mean wrong they are not the 'correct' dtype as everything is getting casted ATM). This should be a dead simple tests. We don't want to keep expanding this, really make this much much simpler. |
||
self.dtype, | ||
com.pandas_dtype(res_dtype)) | ||
assert_equal(result, exp) | ||
|
||
|
||
class TestDtype_object(TestDtype): | ||
dtype = object | ||
|
||
|
||
class TestDtype_int8(TestDtype): | ||
dtype = np.int8 | ||
|
||
|
||
class TestDtype_int16(TestDtype): | ||
dtype = np.int16 | ||
|
||
|
||
class TestDtype_int32(TestDtype): | ||
dtype = np.int32 | ||
|
||
|
||
class TestDtype_int64(TestDtype): | ||
dtype = np.int64 | ||
|
||
|
||
class TestDtype_uint8(TestDtype): | ||
dtype = np.uint8 | ||
|
||
|
||
class TestDtype_uint16(TestDtype): | ||
dtype = np.uint16 | ||
|
||
|
||
class TestDtype_uint32(TestDtype): | ||
dtype = np.uint32 | ||
|
||
|
||
class TestDtype_uint64(TestDtype): | ||
dtype = np.uint64 | ||
|
||
|
||
class TestDtype_float16(TestDtype): | ||
dtype = np.float16 | ||
|
||
|
||
class TestDtype_float32(TestDtype): | ||
dtype = np.float32 | ||
|
||
|
||
class TestDtype_float64(TestDtype): | ||
dtype = np.float64 | ||
|
||
|
||
class TestDtype_category(TestDtype): | ||
dtype = 'category' | ||
include_df = False | ||
|
||
def _create_dtype_data(self, dtype): | ||
sr1 = Series(range(5), dtype=dtype) | ||
sr2 = Series(range(10, 0, -2), dtype=dtype) | ||
|
||
data = { | ||
'sr1': sr1, | ||
'sr2': sr2 | ||
} | ||
|
||
return data | ||
|
||
|
||
class TestDatetimeLikeDtype(TestDtype): | ||
dtype = np.dtype('M8[ns]') | ||
|
||
# GH #12373: rolling functions raise ValueError on float32 data | ||
def setUp(self): | ||
raise nose.SkipTest("Skip rolling on DatetimeLike dtypes.") | ||
|
||
def test_dtypes(self): | ||
with tm.assertRaises(TypeError): | ||
super(TestDatetimeLikeDtype, self).test_dtypes() | ||
|
||
|
||
class TestDtype_timedelta(TestDatetimeLikeDtype): | ||
dtype = np.dtype('m8[ns]') | ||
|
||
|
||
class TestDtype_datetime64UTC(TestDatetimeLikeDtype): | ||
dtype = 'datetime64[ns, UTC]' | ||
|
||
|
||
class TestMoments(Base): | ||
|
||
def setUp(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shouldn't need this as you are overriding this in
TestDatetimelikes
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do so because I see that
assertRaises
doesn't captures the error in setUp(), in which we construct the data and if the dtype is specified to'M8[ns, UTC]'
will raise error there.What's your suggestion to capture this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
override
create_data_types
in Testdatetimelike.setUp
should NEVER raise, the point is its a known starting point. Really try to make this as simple as possible. I am happy for you to skip ALL Datetimelike test ATM.