diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index bb6cb5a444dd9..3615cc3dc8ad8 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -842,6 +842,44 @@ def no_nans(x): _consistency_data = _create_consistency_data() class TestMomentsConsistency(Base): + base_functions = [ + (lambda v: Series(v).count(), None, 'count'), + (lambda v: Series(v).max(), None, 'max'), + (lambda v: Series(v).min(), None, 'min'), + (lambda v: Series(v).sum(), None, 'sum'), + (lambda v: Series(v).mean(), None, 'mean'), + (lambda v: Series(v).std(), 1, 'std'), + (lambda v: Series(v).cov(Series(v)), None, 'cov'), + (lambda v: Series(v).corr(Series(v)), None, 'corr'), + (lambda v: Series(v).var(), 1, 'var'), + #(lambda v: Series(v).skew(), 3, 'skew'), # restore once GH 8086 is fixed + #(lambda v: Series(v).kurt(), 4, 'kurt'), # restore once GH 8086 is fixed + #(lambda x, min_periods: mom.expanding_quantile(x, 0.3, min_periods=min_periods, 'quantile'), + # lambda v: Series(v).quantile(0.3), None, 'quantile'), # restore once GH 8084 is fixed + (lambda v: Series(v).median(), None ,'median'), + (np.nanmax, 1, 'max'), + (np.nanmin, 1, 'min'), + (np.nansum, 1, 'sum'), + ] + if np.__version__ >= LooseVersion('1.8.0'): + base_functions += [ + (np.nanmean, 1, 'mean'), + (lambda v: np.nanstd(v, ddof=1), 1 ,'std'), + (lambda v: np.nanvar(v, ddof=1), 1 ,'var'), + ] + if np.__version__ >= LooseVersion('1.9.0'): + base_functions += [ + (np.nanmedian, 1, 'median'), + ] + no_nan_functions = [ + (np.max, None, 'max'), + (np.min, None, 'min'), + (np.sum, None, 'sum'), + (np.mean, None, 'mean'), + (lambda v: np.std(v, ddof=1), 1 ,'std'), + (lambda v: np.var(v, ddof=1), 1 ,'var'), + (np.median, None, 'median'), + ] def _create_data(self): super(TestMomentsConsistency, self)._create_data() @@ -877,9 +915,11 @@ def _non_null_values(x): # self.assertTrue(_non_null_values(corr_x_x).issubset(set([1.]))) # restore once rolling_cov(x, x) is identically equal to var(x) if is_constant: + exp = x.max() if isinstance(x, Series) else x.max().max() + # check mean of constant series expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = x.max().max() + expected[count_x >= max(min_periods, 1)] = exp assert_equal(mean_x, expected) # check correlation of constant series with itself is NaN @@ -1030,44 +1070,6 @@ def _ewma(s, com, min_periods, adjust, ignore_na): @slow def test_expanding_consistency(self): - base_functions = [ - (mom.expanding_count, lambda v: Series(v).count(), None), - (mom.expanding_max, lambda v: Series(v).max(), None), - (mom.expanding_min, lambda v: Series(v).min(), None), - (mom.expanding_sum, lambda v: Series(v).sum(), None), - (mom.expanding_mean, lambda v: Series(v).mean(), None), - (mom.expanding_std, lambda v: Series(v).std(), 1), - (mom.expanding_cov, lambda v: Series(v).cov(Series(v)), None), - (mom.expanding_corr, lambda v: Series(v).corr(Series(v)), None), - (mom.expanding_var, lambda v: Series(v).var(), 1), - #(mom.expanding_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed - #(mom.expanding_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed - #(lambda x, min_periods: mom.expanding_quantile(x, 0.3, min_periods=min_periods), - # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed - (mom.expanding_median, lambda v: Series(v).median(), None), - (mom.expanding_max, np.nanmax, 1), - (mom.expanding_min, np.nanmin, 1), - (mom.expanding_sum, np.nansum, 1), - ] - if np.__version__ >= LooseVersion('1.8.0'): - base_functions += [ - (mom.expanding_mean, np.nanmean, 1), - (mom.expanding_std, lambda v: np.nanstd(v, ddof=1), 1), - (mom.expanding_var, lambda v: np.nanvar(v, ddof=1), 1), - ] - if np.__version__ >= LooseVersion('1.9.0'): - base_functions += [ - (mom.expanding_median, np.nanmedian, 1), - ] - no_nan_functions = [ - (mom.expanding_max, np.max, None), - (mom.expanding_min, np.min, None), - (mom.expanding_sum, np.sum, None), - (mom.expanding_mean, np.mean, None), - (mom.expanding_std, lambda v: np.std(v, ddof=1), 1), - (mom.expanding_var, lambda v: np.var(v, ddof=1), 1), - (mom.expanding_median, np.median, None), - ] # suppress warnings about empty slices, as we are deliberately testing with empty/0-length Series/DataFrames with warnings.catch_warnings(): @@ -1095,12 +1097,14 @@ def test_expanding_consistency(self): # or (b) expanding_apply of np.nanxyz() for (x, is_constant, no_nans) in self.data: assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal - functions = base_functions + functions = self.base_functions # GH 8269 if no_nans: - functions = base_functions + no_nan_functions - for (expanding_f, f, require_min_periods) in functions: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + expanding_f = getattr(mom,'expanding_{0}'.format(name)) + if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): continue @@ -1113,7 +1117,9 @@ def test_expanding_consistency(self): else: expanding_f_result = expanding_f(x, min_periods=min_periods) expanding_apply_f_result = mom.expanding_apply(x, func=f, min_periods=min_periods) - assert_equal(expanding_f_result, expanding_apply_f_result) + + if not tm._incompat_bottleneck_version(name): + assert_equal(expanding_f_result, expanding_apply_f_result) if (expanding_f in [mom.expanding_cov, mom.expanding_corr]) and isinstance(x, DataFrame): # test pairwise=True @@ -1127,45 +1133,6 @@ def test_expanding_consistency(self): @slow def test_rolling_consistency(self): - base_functions = [ - (mom.rolling_count, lambda v: Series(v).count(), None), - (mom.rolling_max, lambda v: Series(v).max(), None), - (mom.rolling_min, lambda v: Series(v).min(), None), - (mom.rolling_sum, lambda v: Series(v).sum(), None), - (mom.rolling_mean, lambda v: Series(v).mean(), None), - (mom.rolling_std, lambda v: Series(v).std(), 1), - (mom.rolling_cov, lambda v: Series(v).cov(Series(v)), None), - (mom.rolling_corr, lambda v: Series(v).corr(Series(v)), None), - (mom.rolling_var, lambda v: Series(v).var(), 1), - #(mom.rolling_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed - #(mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed - #(lambda x, window, min_periods, center: mom.rolling_quantile(x, window, 0.3, min_periods=min_periods, center=center), - # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed - (mom.rolling_median, lambda v: Series(v).median(), None), - (mom.rolling_max, np.nanmax, 1), - (mom.rolling_min, np.nanmin, 1), - (mom.rolling_sum, np.nansum, 1), - ] - if np.__version__ >= LooseVersion('1.8.0'): - base_functions += [ - (mom.rolling_mean, np.nanmean, 1), - (mom.rolling_std, lambda v: np.nanstd(v, ddof=1), 1), - (mom.rolling_var, lambda v: np.nanvar(v, ddof=1), 1), - ] - if np.__version__ >= LooseVersion('1.9.0'): - base_functions += [ - (mom.rolling_median, np.nanmedian, 1), - ] - no_nan_functions = [ - (mom.rolling_max, np.max, None), - (mom.rolling_min, np.min, None), - (mom.rolling_sum, np.sum, None), - (mom.rolling_mean, np.mean, None), - (mom.rolling_std, lambda v: np.std(v, ddof=1), 1), - (mom.rolling_var, lambda v: np.var(v, ddof=1), 1), - (mom.rolling_median, np.median, None), - ] - for window in [1, 2, 3, 10, 20]: for min_periods in set([0, 1, 2, 3, 4, window]): if min_periods and (min_periods > window): @@ -1195,11 +1162,14 @@ def test_rolling_consistency(self): for (x, is_constant, no_nans) in self.data: assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal - functions = base_functions + functions = self.base_functions + # GH 8269 if no_nans: - functions = base_functions + no_nan_functions - for (rolling_f, f, require_min_periods) in functions: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr(mom,'rolling_{0}'.format(name)) + if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): continue @@ -1214,7 +1184,8 @@ def test_rolling_consistency(self): rolling_f_result = rolling_f(x, window=window, min_periods=min_periods, center=center) rolling_apply_f_result = mom.rolling_apply(x, window=window, func=f, min_periods=min_periods, center=center) - assert_equal(rolling_f_result, rolling_apply_f_result) + if not tm._incompat_bottleneck_version(name): + assert_equal(rolling_f_result, rolling_apply_f_result) if (rolling_f in [mom.rolling_cov, mom.rolling_corr]) and isinstance(x, DataFrame): # test pairwise=True diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index aea165b907c05..e07d6cc3d9b90 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -12471,7 +12471,9 @@ def test_stat_operators_attempt_obj_array(self): self.assertEqual(df.values.dtype, np.object_) result = getattr(df, meth)(1) expected = getattr(df.astype('f8'), meth)(1) - assert_series_equal(result, expected) + + if not tm._incompat_bottleneck_version(meth): + assert_series_equal(result, expected) def test_mean(self): self._check_stat_op('mean', np.mean, check_dates=True) @@ -12696,9 +12698,10 @@ def wrapper(x): assert_series_equal(result0, frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) - assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), - check_dtype=False, - check_less_precise=check_less_precise) + if not tm._incompat_bottleneck_version(name): + assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), + check_dtype=False, + check_less_precise=check_less_precise) # check dtypes if check_dtype: @@ -12727,8 +12730,9 @@ def wrapper(x): all_na = self.frame * np.NaN r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) - self.assertTrue(np.isnan(r0).all()) - self.assertTrue(np.isnan(r1).all()) + if not tm._incompat_bottleneck_version(name): + self.assertTrue(np.isnan(r0).all()) + self.assertTrue(np.isnan(r1).all()) def test_mode(self): df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11], diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index ec6ab4e0d2ab1..f7b6f947d8924 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2014,7 +2014,10 @@ def test_cythonized_aggers(self): df = DataFrame(data) df.loc[2:10:2,'C'] = nan - def _testit(op): + def _testit(name): + + op = lambda x: getattr(x,name)() + # single column grouped = df.drop(['B'], axis=1).groupby('A') exp = {} @@ -2035,18 +2038,19 @@ def _testit(op): exp.name = 'C' result = op(grouped)['C'] - assert_series_equal(result, exp) - - _testit(lambda x: x.count()) - _testit(lambda x: x.sum()) - _testit(lambda x: x.std()) - _testit(lambda x: x.var()) - _testit(lambda x: x.sem()) - _testit(lambda x: x.mean()) - _testit(lambda x: x.median()) - _testit(lambda x: x.prod()) - _testit(lambda x: x.min()) - _testit(lambda x: x.max()) + if not tm._incompat_bottleneck_version(name): + assert_series_equal(result, exp) + + _testit('count') + _testit('sum') + _testit('std') + _testit('var') + _testit('sem') + _testit('mean') + _testit('median') + _testit('prod') + _testit('min') + _testit('max') def test_max_min_non_numeric(self): # #2700 diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index a903b76b3ac7f..fe56d5d1da6bd 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -9,12 +9,13 @@ import pandas.core.nanops as nanops import pandas.util.testing as tm -nanops._USE_BOTTLENECK = False - +use_bn = nanops._USE_BOTTLENECK class TestnanopsDataFrame(tm.TestCase): + def setUp(self): np.random.seed(11235) + nanops._USE_BOTTLENECK = False self.arr_shape = (11, 7, 5) @@ -116,6 +117,9 @@ def setUp(self): self.arr_float_nan_inf_1d = self.arr_float_nan_inf[:, 0, 0] self.arr_nan_nan_inf_1d = self.arr_nan_nan_inf[:, 0, 0] + def tearDown(self): + nanops._USE_BOTTLENECK = use_bn + def check_results(self, targ, res, axis): res = getattr(res, 'asm8', res) res = getattr(res, 'values', res) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 9cdc769dd7d74..64edf29915206 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -168,7 +168,8 @@ def wrapper(x): for i in range(obj.ndim): result = f(axis=i) - assert_frame_equal(result, obj.apply(skipna_wrapper, axis=i)) + if not tm._incompat_bottleneck_version(name): + assert_frame_equal(result, obj.apply(skipna_wrapper, axis=i)) self.assertRaises(Exception, f, axis=obj.ndim) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 289f7f134aa27..3772d4b9c272b 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -144,7 +144,8 @@ def wrapper(x): for i in range(obj.ndim): result = f(axis=i) - assert_panel_equal(result, obj.apply(skipna_wrapper, axis=i)) + if not tm._incompat_bottleneck_version(name): + assert_panel_equal(result, obj.apply(skipna_wrapper, axis=i)) self.assertRaises(Exception, f, axis=obj.ndim) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a195455c116fb..878bfdf3ac9fd 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -246,6 +246,38 @@ def _skip_if_python26(): import nose raise nose.SkipTest("skipping on python2.6") +def _incompat_bottleneck_version(method): + """ skip if we have bottleneck installed + and its >= 1.0 + as we don't match the nansum/nanprod behavior for all-nan + ops, see GH9422 + """ + if method not in ['sum','prod']: + return False + try: + import bottleneck as bn + return bn.__version__ >= LooseVersion('1.0') + except ImportError: + return False + +def skip_if_no_ne(engine='numexpr'): + import nose + _USE_NUMEXPR = pd.computation.expressions._USE_NUMEXPR + + if engine == 'numexpr': + try: + import numexpr as ne + except ImportError: + raise nose.SkipTest("numexpr not installed") + + if not _USE_NUMEXPR: + raise nose.SkipTest("numexpr disabled") + + if ne.__version__ < LooseVersion('2.0'): + raise nose.SkipTest("numexpr version too low: " + "%s" % ne.__version__) + + #------------------------------------------------------------------------------ # locale utilities @@ -1986,24 +2018,6 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", % extra_warnings) -def skip_if_no_ne(engine='numexpr'): - import nose - _USE_NUMEXPR = pd.computation.expressions._USE_NUMEXPR - - if engine == 'numexpr': - try: - import numexpr as ne - except ImportError: - raise nose.SkipTest("numexpr not installed") - - if not _USE_NUMEXPR: - raise nose.SkipTest("numexpr disabled") - - if ne.__version__ < LooseVersion('2.0'): - raise nose.SkipTest("numexpr version too low: " - "%s" % ne.__version__) - - def disabled(t): t.disabled = True return t