From d23ac16b629800dd6898c807020535b6e91429a0 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 17 Sep 2018 02:57:05 +0200 Subject: [PATCH 01/14] Fixturize frame/test_analytics.py --- pandas/tests/frame/conftest.py | 29 ++ pandas/tests/frame/test_analytics.py | 466 +++++++++++++-------------- 2 files changed, 261 insertions(+), 234 deletions(-) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index fdedb93835d75..d797adbcc7c84 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -17,6 +17,20 @@ def float_frame(): return DataFrame(tm.getSeriesData()) +@pytest.fixture +def float_frame_with_na(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + df = DataFrame(tm.getSeriesData()) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + @pytest.fixture def float_frame2(): """ @@ -27,6 +41,21 @@ def float_frame2(): return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) +@pytest.fixture +def bool_frame_with_na(): + """ + Fixture for DataFrame of booleans with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + df = DataFrame(tm.getSeriesData()) > 0 + df = df.astype(object) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + @pytest.fixture def int_frame(): """ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index baebf414969be..84f42cf43448e 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -23,54 +23,47 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.tests.frame.common import TestData -class TestDataFrameAnalytics(TestData): +class TestDataFrameAnalytics(): # ---------------------------------------------------------------------= # Correlation and covariance @td.skip_if_no_scipy - def test_corr_pearson(self): - self.frame['A'][:5] = nan - self.frame['B'][5:10] = nan + def test_corr_pearson(self, float_frame): + float_frame['A'][:5] = nan + float_frame['B'][5:10] = nan - self._check_method('pearson') + self._check_method(float_frame, 'pearson') @td.skip_if_no_scipy - def test_corr_kendall(self): - self.frame['A'][:5] = nan - self.frame['B'][5:10] = nan + def test_corr_kendall(self, float_frame): + float_frame['A'][:5] = nan + float_frame['B'][5:10] = nan - self._check_method('kendall') + self._check_method(float_frame, 'kendall') @td.skip_if_no_scipy - def test_corr_spearman(self): - self.frame['A'][:5] = nan - self.frame['B'][5:10] = nan + def test_corr_spearman(self, float_frame): + float_frame['A'][:5] = nan + float_frame['B'][5:10] = nan - self._check_method('spearman') + self._check_method(float_frame, 'spearman') - def _check_method(self, method='pearson', check_minp=False): - if not check_minp: - correls = self.frame.corr(method=method) - exp = self.frame['A'].corr(self.frame['C'], method=method) - tm.assert_almost_equal(correls['A']['C'], exp) - else: - result = self.frame.corr(min_periods=len(self.frame) - 8) - expected = self.frame.corr() - expected.loc['A', 'B'] = expected.loc['B', 'A'] = nan - tm.assert_frame_equal(result, expected) + def _check_method(self, frame, method='pearson'): + correls = frame.corr(method=method) + exp = frame['A'].corr(frame['C'], method=method) + tm.assert_almost_equal(correls['A']['C'], exp) @td.skip_if_no_scipy - def test_corr_non_numeric(self): - self.frame['A'][:5] = nan - self.frame['B'][5:10] = nan + def test_corr_non_numeric(self, float_frame, float_string_frame): + float_frame['A'][:5] = nan + float_frame['B'][5:10] = nan # exclude non-numeric types - result = self.mixed_frame.corr() - expected = self.mixed_frame.loc[:, ['A', 'B', 'C', 'D']].corr() + result = float_string_frame.corr() + expected = float_string_frame.loc[:, ['A', 'B', 'C', 'D']].corr() tm.assert_frame_equal(result, expected) @td.skip_if_no_scipy @@ -138,36 +131,36 @@ def test_corr_invalid_method(self): with tm.assert_raises_regex(ValueError, msg): df.corr(method="____") - def test_cov(self): + def test_cov(self, float_frame, float_string_frame): # min_periods no NAs (corner case) - expected = self.frame.cov() - result = self.frame.cov(min_periods=len(self.frame)) + expected = float_frame.cov() + result = float_frame.cov(min_periods=len(float_frame)) tm.assert_frame_equal(expected, result) - result = self.frame.cov(min_periods=len(self.frame) + 1) + result = float_frame.cov(min_periods=len(float_frame) + 1) assert isna(result.values).all() # with NAs - frame = self.frame.copy() + frame = float_frame.copy() frame['A'][:5] = nan frame['B'][5:10] = nan - result = self.frame.cov(min_periods=len(self.frame) - 8) - expected = self.frame.cov() + result = float_frame.cov(min_periods=len(float_frame) - 8) + expected = float_frame.cov() expected.loc['A', 'B'] = np.nan expected.loc['B', 'A'] = np.nan # regular - self.frame['A'][:5] = nan - self.frame['B'][:10] = nan - cov = self.frame.cov() + float_frame['A'][:5] = nan + float_frame['B'][:10] = nan + cov = float_frame.cov() tm.assert_almost_equal(cov['A']['C'], - self.frame['A'].cov(self.frame['C'])) + float_frame['A'].cov(float_frame['C'])) # exclude non-numeric types - result = self.mixed_frame.cov() - expected = self.mixed_frame.loc[:, ['A', 'B', 'C', 'D']].cov() + result = float_string_frame.cov() + expected = float_string_frame.loc[:, ['A', 'B', 'C', 'D']].cov() tm.assert_frame_equal(result, expected) # Single column frame @@ -182,11 +175,11 @@ def test_cov(self): index=df.columns, columns=df.columns) tm.assert_frame_equal(result, expected) - def test_corrwith(self): - a = self.tsframe + def test_corrwith(self, datetime_frame): + a = datetime_frame noise = Series(randn(len(a)), index=a.index) - b = self.tsframe.add(noise, axis=0) + b = datetime_frame.add(noise, axis=0) # make sure order does not matter b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:]) @@ -231,9 +224,9 @@ def test_corrwith_with_objects(self): expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) tm.assert_series_equal(result, expected) - def test_corrwith_series(self): - result = self.tsframe.corrwith(self.tsframe['A']) - expected = self.tsframe.apply(self.tsframe['A'].corr) + def test_corrwith_series(self, datetime_frame): + result = datetime_frame.corrwith(datetime_frame['A']) + expected = datetime_frame.apply(datetime_frame['A'].corr) tm.assert_series_equal(result, expected) @@ -460,12 +453,11 @@ def test_reduce_mixed_frame(self): np.array([2, 150, 'abcde'], dtype=object)) tm.assert_series_equal(test, df.T.sum(axis=1)) - def test_count(self): + def test_count(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: notna(s).sum() - self._check_stat_op('count', f, - has_skipna=False, - has_numeric_only=True, - check_dtype=False, + self._check_stat_op('count', f, float_frame_with_na, float_frame, + float_string_frame, has_skipna=False, + has_numeric_only=True, check_dtype=False, check_dates=True) # corner case @@ -492,9 +484,11 @@ def test_count(self): expected = Series(0, index=[]) tm.assert_series_equal(result, expected) - def test_nunique(self): + def test_nunique(self, float_frame_with_na, float_frame, + float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) - self._check_stat_op('nunique', f, has_skipna=False, + self._check_stat_op('nunique', f, float_frame_with_na, + float_frame, float_string_frame, has_skipna=False, check_dtype=False, check_dates=True) df = DataFrame({'A': [1, 1, 1], @@ -507,19 +501,20 @@ def test_nunique(self): tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})) - def test_sum(self): - self._check_stat_op('sum', np.sum, has_numeric_only=True, + def test_sum(self, float_frame_with_na, mixed_float_frame, + float_frame, float_string_frame): + self._check_stat_op('sum', np.sum, float_frame_with_na, float_frame, + float_string_frame, has_numeric_only=True, skipna_alternative=np.nansum) # mixed types (with upcasting happening) self._check_stat_op('sum', np.sum, - frame=self.mixed_float.astype('float32'), - has_numeric_only=True, check_dtype=False, - check_less_precise=True) + mixed_float_frame.astype('float32'), float_frame, + float_string_frame, has_numeric_only=True, + check_dtype=False, check_less_precise=True) - @pytest.mark.parametrize( - "method", ['sum', 'mean', 'prod', 'var', - 'std', 'skew', 'min', 'max']) + @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', + 'std', 'skew', 'min', 'max']) def test_stat_operators_attempt_obj_array(self, method): # GH #676 data = { @@ -529,8 +524,7 @@ def test_stat_operators_attempt_obj_array(self, method): 'c': [0.00031111847529610595, 0.0014902627951905339, -0.00094099200035979691] } - df1 = DataFrame(data, index=['foo', 'bar', 'baz'], - dtype='O') + df1 = DataFrame(data, index=['foo', 'bar', 'baz'], dtype='O') df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object) @@ -543,41 +537,50 @@ def test_stat_operators_attempt_obj_array(self, method): if method in ['sum', 'prod']: tm.assert_series_equal(result, expected) - def test_mean(self): - self._check_stat_op('mean', np.mean, check_dates=True) + def test_mean(self, float_frame_with_na, float_frame, float_string_frame): + self._check_stat_op('mean', np.mean, float_frame_with_na, + float_frame, float_string_frame, check_dates=True) - def test_product(self): - self._check_stat_op('product', np.prod) + def test_product(self, float_frame_with_na, float_frame, + float_string_frame): + self._check_stat_op('product', np.prod, float_frame_with_na, + float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median(self): + def test_median(self, float_frame_with_na, float_frame, + float_string_frame): def wrapper(x): if isna(x).any(): return np.nan return np.median(x) - self._check_stat_op('median', wrapper, check_dates=True) + self._check_stat_op('median', wrapper, float_frame_with_na, + float_frame, float_string_frame, check_dates=True) - def test_min(self): + def test_min(self, float_frame_with_na, int_frame, + float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - self._check_stat_op('min', np.min, check_dates=True) - self._check_stat_op('min', np.min, frame=self.intframe) + self._check_stat_op('min', np.min, float_frame_with_na, + float_frame, float_string_frame, + check_dates=True) + self._check_stat_op('min', np.min, int_frame, float_frame, + float_string_frame) - def test_cummin(self): - self.tsframe.loc[5:10, 0] = nan - self.tsframe.loc[10:15, 1] = nan - self.tsframe.loc[15:, 2] = nan + def test_cummin(self, datetime_frame): + datetime_frame.loc[5:10, 0] = nan + datetime_frame.loc[10:15, 1] = nan + datetime_frame.loc[15:, 2] = nan # axis = 0 - cummin = self.tsframe.cummin() - expected = self.tsframe.apply(Series.cummin) + cummin = datetime_frame.cummin() + expected = datetime_frame.apply(Series.cummin) tm.assert_frame_equal(cummin, expected) # axis = 1 - cummin = self.tsframe.cummin(axis=1) - expected = self.tsframe.apply(Series.cummin, axis=1) + cummin = datetime_frame.cummin(axis=1) + expected = datetime_frame.apply(Series.cummin, axis=1) tm.assert_frame_equal(cummin, expected) # it works @@ -585,22 +588,22 @@ def test_cummin(self): result = df.cummin() # noqa # fix issue - cummin_xs = self.tsframe.cummin(axis=1) - assert np.shape(cummin_xs) == np.shape(self.tsframe) + cummin_xs = datetime_frame.cummin(axis=1) + assert np.shape(cummin_xs) == np.shape(datetime_frame) - def test_cummax(self): - self.tsframe.loc[5:10, 0] = nan - self.tsframe.loc[10:15, 1] = nan - self.tsframe.loc[15:, 2] = nan + def test_cummax(self, datetime_frame): + datetime_frame.loc[5:10, 0] = nan + datetime_frame.loc[10:15, 1] = nan + datetime_frame.loc[15:, 2] = nan # axis = 0 - cummax = self.tsframe.cummax() - expected = self.tsframe.apply(Series.cummax) + cummax = datetime_frame.cummax() + expected = datetime_frame.apply(Series.cummax) tm.assert_frame_equal(cummax, expected) # axis = 1 - cummax = self.tsframe.cummax(axis=1) - expected = self.tsframe.apply(Series.cummax, axis=1) + cummax = datetime_frame.cummax(axis=1) + expected = datetime_frame.apply(Series.cummax, axis=1) tm.assert_frame_equal(cummax, expected) # it works @@ -608,32 +611,40 @@ def test_cummax(self): result = df.cummax() # noqa # fix issue - cummax_xs = self.tsframe.cummax(axis=1) - assert np.shape(cummax_xs) == np.shape(self.tsframe) + cummax_xs = datetime_frame.cummax(axis=1) + assert np.shape(cummax_xs) == np.shape(datetime_frame) - def test_max(self): + def test_max(self, float_frame_with_na, int_frame, + float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - self._check_stat_op('max', np.max, check_dates=True) - self._check_stat_op('max', np.max, frame=self.intframe) + self._check_stat_op('max', np.max, float_frame_with_na, + float_frame, float_string_frame, + check_dates=True) + self._check_stat_op('max', np.max, int_frame, float_frame, + float_string_frame) - def test_mad(self): + def test_mad(self, float_frame_with_na, float_frame, float_string_frame): f = lambda x: np.abs(x - x.mean()).mean() - self._check_stat_op('mad', f) + self._check_stat_op('mad', f, float_frame_with_na, float_frame, + float_string_frame) - def test_var_std(self): + def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, + float_string_frame): alt = lambda x: np.var(x, ddof=1) - self._check_stat_op('var', alt) + self._check_stat_op('var', alt, float_frame_with_na, float_frame, + float_string_frame) alt = lambda x: np.std(x, ddof=1) - self._check_stat_op('std', alt) + self._check_stat_op('std', alt, float_frame_with_na, float_frame, + float_string_frame) - result = self.tsframe.std(ddof=4) - expected = self.tsframe.apply(lambda x: x.std(ddof=4)) + result = datetime_frame.std(ddof=4) + expected = datetime_frame.apply(lambda x: x.std(ddof=4)) tm.assert_almost_equal(result, expected) - result = self.tsframe.var(ddof=4) - expected = self.tsframe.apply(lambda x: x.var(ddof=4)) + result = datetime_frame.var(ddof=4) + expected = datetime_frame.apply(lambda x: x.var(ddof=4)) tm.assert_almost_equal(result, expected) arr = np.repeat(np.random.random((1, 1000)), 1000, 0) @@ -685,19 +696,19 @@ def test_mixed_ops(self, op): result = getattr(df, op)() assert len(result) == 2 - def test_cumsum(self): - self.tsframe.loc[5:10, 0] = nan - self.tsframe.loc[10:15, 1] = nan - self.tsframe.loc[15:, 2] = nan + def test_cumsum(self, datetime_frame): + datetime_frame.loc[5:10, 0] = nan + datetime_frame.loc[10:15, 1] = nan + datetime_frame.loc[15:, 2] = nan # axis = 0 - cumsum = self.tsframe.cumsum() - expected = self.tsframe.apply(Series.cumsum) + cumsum = datetime_frame.cumsum() + expected = datetime_frame.apply(Series.cumsum) tm.assert_frame_equal(cumsum, expected) # axis = 1 - cumsum = self.tsframe.cumsum(axis=1) - expected = self.tsframe.apply(Series.cumsum, axis=1) + cumsum = datetime_frame.cumsum(axis=1) + expected = datetime_frame.apply(Series.cumsum, axis=1) tm.assert_frame_equal(cumsum, expected) # works @@ -705,44 +716,46 @@ def test_cumsum(self): result = df.cumsum() # noqa # fix issue - cumsum_xs = self.tsframe.cumsum(axis=1) - assert np.shape(cumsum_xs) == np.shape(self.tsframe) + cumsum_xs = datetime_frame.cumsum(axis=1) + assert np.shape(cumsum_xs) == np.shape(datetime_frame) - def test_cumprod(self): - self.tsframe.loc[5:10, 0] = nan - self.tsframe.loc[10:15, 1] = nan - self.tsframe.loc[15:, 2] = nan + def test_cumprod(self, datetime_frame): + datetime_frame.loc[5:10, 0] = nan + datetime_frame.loc[10:15, 1] = nan + datetime_frame.loc[15:, 2] = nan # axis = 0 - cumprod = self.tsframe.cumprod() - expected = self.tsframe.apply(Series.cumprod) + cumprod = datetime_frame.cumprod() + expected = datetime_frame.apply(Series.cumprod) tm.assert_frame_equal(cumprod, expected) # axis = 1 - cumprod = self.tsframe.cumprod(axis=1) - expected = self.tsframe.apply(Series.cumprod, axis=1) + cumprod = datetime_frame.cumprod(axis=1) + expected = datetime_frame.apply(Series.cumprod, axis=1) tm.assert_frame_equal(cumprod, expected) # fix issue - cumprod_xs = self.tsframe.cumprod(axis=1) - assert np.shape(cumprod_xs) == np.shape(self.tsframe) + cumprod_xs = datetime_frame.cumprod(axis=1) + assert np.shape(cumprod_xs) == np.shape(datetime_frame) # ints - df = self.tsframe.fillna(0).astype(int) + df = datetime_frame.fillna(0).astype(int) df.cumprod(0) df.cumprod(1) # ints32 - df = self.tsframe.fillna(0).astype(np.int32) + df = datetime_frame.fillna(0).astype(np.int32) df.cumprod(0) df.cumprod(1) - def test_sem(self): + def test_sem(self, float_frame_with_na, datetime_frame, + float_frame, float_string_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - self._check_stat_op('sem', alt) + self._check_stat_op('sem', alt, float_frame_with_na, + float_frame, float_string_frame) - result = self.tsframe.sem(ddof=4) - expected = self.tsframe.apply( + result = datetime_frame.sem(ddof=4) + expected = datetime_frame.apply( lambda x: x.std(ddof=4) / np.sqrt(len(x))) tm.assert_almost_equal(result, expected) @@ -755,7 +768,7 @@ def test_sem(self): assert not (result < 0).any() @td.skip_if_no_scipy - def test_skew(self): + def test_skew(self, float_frame_with_na, float_frame, float_string_frame): from scipy.stats import skew def alt(x): @@ -763,10 +776,11 @@ def alt(x): return np.nan return skew(x, bias=False) - self._check_stat_op('skew', alt) + self._check_stat_op('skew', alt, float_frame_with_na, + float_frame, float_string_frame) @td.skip_if_no_scipy - def test_kurt(self): + def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): from scipy.stats import kurtosis def alt(x): @@ -774,7 +788,8 @@ def alt(x): return np.nan return kurtosis(x, bias=False) - self._check_stat_op('kurt', alt) + self._check_stat_op('kurt', alt, float_frame_with_na, + float_frame, float_string_frame) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], @@ -788,17 +803,13 @@ def alt(x): assert kurt.name is None assert kurt2.name == 'bar' - def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, + def _check_stat_op(self, name, alternative, main_frame, float_frame, + float_string_frame, has_skipna=True, has_numeric_only=False, check_dtype=True, check_dates=False, check_less_precise=False, skipna_alternative=None): - if frame is None: - frame = self.frame - # set some NAs - frame.loc[5:10] = np.nan - frame.loc[15:20, -2:] = np.nan - f = getattr(frame, name) + f = getattr(main_frame, name) if check_dates: df = DataFrame({'b': date_range('1/1/2001', periods=2)}) @@ -819,52 +830,47 @@ def wrapper(x): skipna_alternative) result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, frame.apply(wrapper), + tm.assert_series_equal(result0, main_frame.apply(wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) # HACK: win32 - tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), + tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), check_dtype=False, check_less_precise=check_less_precise) else: skipna_wrapper = alternative - wrapper = alternative result0 = f(axis=0) result1 = f(axis=1) - tm.assert_series_equal(result0, frame.apply(skipna_wrapper), + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) if name in ['sum', 'prod']: - exp = frame.apply(skipna_wrapper, axis=1) + exp = main_frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, exp, check_dtype=False, check_less_precise=check_less_precise) # check dtypes if check_dtype: - lcd_dtype = frame.values.dtype + lcd_dtype = main_frame.values.dtype assert lcd_dtype == result0.dtype assert lcd_dtype == result1.dtype - # result = f(axis=1) - # comp = frame.apply(alternative, axis=1).reindex(result.index) - # assert_series_equal(result, comp) - # bad axis tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) # make sure works on mixed-type frame - getattr(self.mixed_frame, name)(axis=0) - getattr(self.mixed_frame, name)(axis=1) + getattr(float_string_frame, name)(axis=0) + getattr(float_string_frame, name)(axis=1) if has_numeric_only: - getattr(self.mixed_frame, name)(axis=0, numeric_only=True) - getattr(self.mixed_frame, name)(axis=1, numeric_only=True) - getattr(self.frame, name)(axis=0, numeric_only=False) - getattr(self.frame, name)(axis=1, numeric_only=False) + getattr(float_string_frame, name)(axis=0, numeric_only=True) + getattr(float_string_frame, name)(axis=1, numeric_only=True) + getattr(float_frame, name)(axis=0, numeric_only=False) + getattr(float_frame, name)(axis=1, numeric_only=False) # all NA case if has_skipna: - all_na = self.frame * np.NaN + all_na = float_frame * np.NaN r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) if name in ['sum', 'prod']: @@ -1022,9 +1028,9 @@ def test_operators_timedelta64(self): assert df['off1'].dtype == 'timedelta64[ns]' assert df['off2'].dtype == 'timedelta64[ns]' - def test_sum_corner(self): - axis0 = self.empty.sum(0) - axis1 = self.empty.sum(1) + def test_sum_corner(self, empty_frame): + axis0 = empty_frame.sum(0) + axis1 = empty_frame.sum(1) assert isinstance(axis0, Series) assert isinstance(axis1, Series) assert len(axis0) == 0 @@ -1090,59 +1096,60 @@ def test_sum_nanops_timedelta(self): expected = pd.Series([0, 0, np.nan], dtype='m8[ns]', index=idx) tm.assert_series_equal(result, expected) - def test_sum_object(self): - values = self.frame.values.astype(int) - frame = DataFrame(values, index=self.frame.index, - columns=self.frame.columns) + def test_sum_object(self, float_frame): + values = float_frame.values.astype(int) + frame = DataFrame(values, index=float_frame.index, + columns=float_frame.columns) deltas = frame * timedelta(1) deltas.sum() - def test_sum_bool(self): + def test_sum_bool(self, float_frame): # ensure this works, bug report - bools = np.isnan(self.frame) + bools = np.isnan(float_frame) bools.sum(1) bools.sum(0) - def test_mean_corner(self): + def test_mean_corner(self, float_frame, float_string_frame): # unit test when have object data - the_mean = self.mixed_frame.mean(axis=0) - the_sum = self.mixed_frame.sum(axis=0, numeric_only=True) + the_mean = float_string_frame.mean(axis=0) + the_sum = float_string_frame.sum(axis=0, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) - assert len(the_mean.index) < len(self.mixed_frame.columns) + assert len(the_mean.index) < len(float_string_frame.columns) # xs sum mixed type, just want to know it works... - the_mean = self.mixed_frame.mean(axis=1) - the_sum = self.mixed_frame.sum(axis=1, numeric_only=True) + the_mean = float_string_frame.mean(axis=1) + the_sum = float_string_frame.sum(axis=1, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) # take mean of boolean column - self.frame['bool'] = self.frame['A'] > 0 - means = self.frame.mean(0) - assert means['bool'] == self.frame['bool'].values.mean() + float_frame['bool'] = float_frame['A'] > 0 + means = float_frame.mean(0) + assert means['bool'] == float_frame['bool'].values.mean() - def test_stats_mixed_type(self): + def test_stats_mixed_type(self, float_string_frame): # don't blow up - self.mixed_frame.std(1) - self.mixed_frame.var(1) - self.mixed_frame.mean(1) - self.mixed_frame.skew(1) + float_string_frame.std(1) + float_string_frame.var(1) + float_string_frame.mean(1) + float_string_frame.skew(1) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self): + def test_median_corner(self, int_frame, float_frame, float_string_frame): def wrapper(x): if isna(x).any(): return np.nan return np.median(x) - self._check_stat_op('median', wrapper, frame=self.intframe, - check_dtype=False, check_dates=True) + self._check_stat_op('median', wrapper, int_frame, float_frame, + float_string_frame, check_dtype=False, + check_dates=True) # Miscellanea - def test_count_objects(self): - dm = DataFrame(self.mixed_frame._series) - df = DataFrame(self.mixed_frame._series) + def test_count_objects(self, float_string_frame): + dm = DataFrame(float_string_frame._series) + df = DataFrame(float_string_frame._series) tm.assert_series_equal(dm.count(), df.count()) tm.assert_series_equal(dm.count(1), df.count(1)) @@ -1160,13 +1167,13 @@ def test_sum_bools(self): # Index of max / min - def test_idxmin(self): - frame = self.frame + def test_idxmin(self, float_frame, int_frame): + frame = float_frame frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan for skipna in [True, False]: for axis in [0, 1]: - for df in [frame, self.intframe]: + for df in [frame, int_frame]: result = df.idxmin(axis=axis, skipna=skipna) expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) @@ -1174,13 +1181,13 @@ def test_idxmin(self): pytest.raises(ValueError, frame.idxmin, axis=2) - def test_idxmax(self): - frame = self.frame + def test_idxmax(self, float_frame, int_frame): + frame = float_frame frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan for skipna in [True, False]: for axis in [0, 1]: - for df in [frame, self.intframe]: + for df in [frame, int_frame]: result = df.idxmax(axis=axis, skipna=skipna) expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) @@ -1191,9 +1198,13 @@ def test_idxmax(self): # ---------------------------------------------------------------------- # Logical reductions - def test_any_all(self): - self._check_bool_op('any', np.any, has_skipna=True, has_bool_only=True) - self._check_bool_op('all', np.all, has_skipna=True, has_bool_only=True) + def test_any_all(self, bool_frame_with_na, float_string_frame): + self._check_bool_op('any', np.any, bool_frame_with_na, + float_string_frame, has_skipna=True, + has_bool_only=True) + self._check_bool_op('all', np.all, bool_frame_with_na, + float_string_frame, has_skipna=True, + has_bool_only=True) def test_any_all_extra(self): df = DataFrame({ @@ -1325,15 +1336,8 @@ def test_any_all_level_axis_none_raises(self, method): with tm.assert_raises_regex(ValueError, xpr): getattr(df, method)(axis=None, level='out') - def _check_bool_op(self, name, alternative, frame=None, has_skipna=True, - has_bool_only=False): - if frame is None: - frame = self.frame > 0 - # set some NAs - frame = DataFrame(frame.values.astype(object), frame.index, - frame.columns) - frame.loc[5:10] = np.nan - frame.loc[15:20, -2:] = np.nan + def _check_bool_op(self, name, alternative, frame, float_string_frame, + has_skipna=True, has_bool_only=False): f = getattr(frame, name) @@ -1360,15 +1364,11 @@ def wrapper(x): tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), check_dtype=False) - # result = f(axis=1) - # comp = frame.apply(alternative, axis=1).reindex(result.index) - # assert_series_equal(result, comp) - # bad axis pytest.raises(ValueError, f, axis=2) # make sure works on mixed-type frame - mixed = self.mixed_frame + mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 getattr(mixed, name)(axis=0) getattr(mixed, name)(axis=1) @@ -1746,34 +1746,34 @@ def test_pct_change(self): # Clip - def test_clip(self): - median = self.frame.median().median() - original = self.frame.copy() + def test_clip(self, float_frame): + median = float_frame.median().median() + original = float_frame.copy() - capped = self.frame.clip_upper(median) + capped = float_frame.clip_upper(median) assert not (capped.values > median).any() - floored = self.frame.clip_lower(median) + floored = float_frame.clip_lower(median) assert not (floored.values < median).any() - double = self.frame.clip(upper=median, lower=median) + double = float_frame.clip(upper=median, lower=median) assert not (double.values != median).any() - # Verify that self.frame was not changed inplace - assert (self.frame.values == original.values).all() + # Verify that float_frame was not changed inplace + assert (float_frame.values == original.values).all() - def test_inplace_clip(self): + def test_inplace_clip(self, float_frame): # GH #15388 - median = self.frame.median().median() - frame_copy = self.frame.copy() + median = float_frame.median().median() + frame_copy = float_frame.copy() frame_copy.clip_upper(median, inplace=True) assert not (frame_copy.values > median).any() - frame_copy = self.frame.copy() + frame_copy = float_frame.copy() frame_copy.clip_lower(median, inplace=True) assert not (frame_copy.values < median).any() - frame_copy = self.frame.copy() + frame_copy = float_frame.copy() frame_copy.clip(upper=median, lower=median, inplace=True) assert not (frame_copy.values != median).any() @@ -1839,9 +1839,10 @@ def test_clip_against_series(self, inplace): (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) ]) - def test_clip_against_list_like(self, inplace, lower, axis, res): + def test_clip_against_list_like(self, simple_frame, + inplace, lower, axis, res): # GH #15390 - original = self.simple.copy(deep=True) + original = simple_frame.copy(deep=True) result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace) @@ -1869,12 +1870,12 @@ def test_clip_against_frame(self, axis): tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) tm.assert_frame_equal(clipped_df[mask], df[mask]) - def test_clip_with_na_args(self): + def test_clip_with_na_args(self, float_frame): """Should process np.nan argument as None """ # GH # 17276 - tm.assert_frame_equal(self.frame.clip(np.nan), self.frame) - tm.assert_frame_equal(self.frame.clip(upper=np.nan, lower=np.nan), - self.frame) + tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) + tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), + float_frame) # GH #19992 df = DataFrame({'col_0': [1, 2, 3], 'col_1': [4, 5, 6], @@ -2084,9 +2085,6 @@ def test_n_error(self, df_main_dtypes, nselect_method, columns): col = columns[1] error_msg = self.dtype_error_msg_template.format( column=col, method=nselect_method, dtype=df[col].dtype) - # escape some characters that may be in the repr - error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)") - .replace("[", "\\[").replace("]", "\\]")) with tm.assert_raises_regex(TypeError, error_msg): getattr(df, nselect_method)(2, columns) From 485e0d8c70fc5f33cb2672942e0e48543d8fa1da Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 18 Sep 2018 10:27:33 +0200 Subject: [PATCH 02/14] Review (WillAyd) --- pandas/tests/frame/test_analytics.py | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 84f42cf43448e..92f0c71d19cd1 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -53,8 +53,8 @@ def test_corr_spearman(self, float_frame): def _check_method(self, frame, method='pearson'): correls = frame.corr(method=method) - exp = frame['A'].corr(frame['C'], method=method) - tm.assert_almost_equal(correls['A']['C'], exp) + expected = frame['A'].corr(frame['C'], method=method) + tm.assert_almost_equal(correls['A']['C'], expected) @td.skip_if_no_scipy def test_corr_non_numeric(self, float_frame, float_string_frame): @@ -803,8 +803,9 @@ def alt(x): assert kurt.name is None assert kurt2.name == 'bar' - def _check_stat_op(self, name, alternative, main_frame, float_frame, - float_string_frame, has_skipna=True, + # underscores added to distinguish argument names from fixture names + def _check_stat_op(self, name, alternative, main_frame, float_frame_, + float_string_frame_, has_skipna=True, has_numeric_only=False, check_dtype=True, check_dates=False, check_less_precise=False, skipna_alternative=None): @@ -846,8 +847,8 @@ def wrapper(x): check_dtype=check_dtype, check_less_precise=check_less_precise) if name in ['sum', 'prod']: - exp = main_frame.apply(skipna_wrapper, axis=1) - tm.assert_series_equal(result1, exp, check_dtype=False, + expected = main_frame.apply(skipna_wrapper, axis=1) + tm.assert_series_equal(result1, expected, check_dtype=False, check_less_precise=check_less_precise) # check dtypes @@ -859,18 +860,18 @@ def wrapper(x): # bad axis tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) # make sure works on mixed-type frame - getattr(float_string_frame, name)(axis=0) - getattr(float_string_frame, name)(axis=1) + getattr(float_string_frame_, name)(axis=0) + getattr(float_string_frame_, name)(axis=1) if has_numeric_only: - getattr(float_string_frame, name)(axis=0, numeric_only=True) - getattr(float_string_frame, name)(axis=1, numeric_only=True) - getattr(float_frame, name)(axis=0, numeric_only=False) - getattr(float_frame, name)(axis=1, numeric_only=False) + getattr(float_string_frame_, name)(axis=0, numeric_only=True) + getattr(float_string_frame_, name)(axis=1, numeric_only=True) + getattr(float_frame_, name)(axis=0, numeric_only=False) + getattr(float_frame_, name)(axis=1, numeric_only=False) # all NA case if has_skipna: - all_na = float_frame * np.NaN + all_na = float_frame_ * np.NaN r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) if name in ['sum', 'prod']: @@ -1920,8 +1921,8 @@ def test_dot(self): row = a.iloc[0].values result = a.dot(row) - exp = a.dot(a.iloc[0]) - tm.assert_series_equal(result, exp) + expected = a.dot(a.iloc[0]) + tm.assert_series_equal(result, expected) with tm.assert_raises_regex(ValueError, 'Dot product shape mismatch'): @@ -2085,6 +2086,9 @@ def test_n_error(self, df_main_dtypes, nselect_method, columns): col = columns[1] error_msg = self.dtype_error_msg_template.format( column=col, method=nselect_method, dtype=df[col].dtype) + # escape some characters that may be in the repr + error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)") + .replace("[", "\\[").replace("]", "\\]")) with tm.assert_raises_regex(TypeError, error_msg): getattr(df, nselect_method)(2, columns) From f1a394ac126a37dd60e4e77c715559c53dbf2b48 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:33:51 +0200 Subject: [PATCH 03/14] Revert disambiguating underscores --- pandas/tests/frame/test_analytics.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 92f0c71d19cd1..e9f7f24634c98 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -803,9 +803,8 @@ def alt(x): assert kurt.name is None assert kurt2.name == 'bar' - # underscores added to distinguish argument names from fixture names - def _check_stat_op(self, name, alternative, main_frame, float_frame_, - float_string_frame_, has_skipna=True, + def _check_stat_op(self, name, alternative, main_frame, float_frame, + float_string_frame, has_skipna=True, has_numeric_only=False, check_dtype=True, check_dates=False, check_less_precise=False, skipna_alternative=None): @@ -860,18 +859,18 @@ def wrapper(x): # bad axis tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) # make sure works on mixed-type frame - getattr(float_string_frame_, name)(axis=0) - getattr(float_string_frame_, name)(axis=1) + getattr(float_string_frame, name)(axis=0) + getattr(float_string_frame, name)(axis=1) if has_numeric_only: - getattr(float_string_frame_, name)(axis=0, numeric_only=True) - getattr(float_string_frame_, name)(axis=1, numeric_only=True) - getattr(float_frame_, name)(axis=0, numeric_only=False) - getattr(float_frame_, name)(axis=1, numeric_only=False) + getattr(float_string_frame, name)(axis=0, numeric_only=True) + getattr(float_string_frame, name)(axis=1, numeric_only=True) + getattr(float_frame, name)(axis=0, numeric_only=False) + getattr(float_frame, name)(axis=1, numeric_only=False) # all NA case if has_skipna: - all_na = float_frame_ * np.NaN + all_na = float_frame * np.NaN r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) if name in ['sum', 'prod']: From 7ac476ec50d3a9b44112c078a61f9455efe93c07 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:34:40 +0200 Subject: [PATCH 04/14] Pure copy/paste of _check_stat_op and _check_bool_op --- pandas/tests/frame/test_analytics.py | 280 ++++++++++++++------------- 1 file changed, 141 insertions(+), 139 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index e9f7f24634c98..44b986fbb59fc 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,6 +25,147 @@ import pandas.util._test_decorators as td + def _check_stat_op(self, name, alternative, main_frame, float_frame, + float_string_frame, has_skipna=True, + has_numeric_only=False, check_dtype=True, + check_dates=False, check_less_precise=False, + skipna_alternative=None): + + f = getattr(main_frame, name) + + if check_dates: + df = DataFrame({'b': date_range('1/1/2001', periods=2)}) + _f = getattr(df, name) + result = _f() + assert isinstance(result, Series) + + df['a'] = lrange(len(df)) + result = getattr(df, name)() + assert isinstance(result, Series) + assert len(result) + + if has_skipna: + def wrapper(x): + return alternative(x.values) + + skipna_wrapper = tm._make_skipna_wrapper(alternative, + skipna_alternative) + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal(result0, main_frame.apply(wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise) + # HACK: win32 + tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), + check_dtype=False, + check_less_precise=check_less_precise) + else: + skipna_wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise) + if name in ['sum', 'prod']: + expected = main_frame.apply(skipna_wrapper, axis=1) + tm.assert_series_equal(result1, expected, check_dtype=False, + check_less_precise=check_less_precise) + + # check dtypes + if check_dtype: + lcd_dtype = main_frame.values.dtype + assert lcd_dtype == result0.dtype + assert lcd_dtype == result1.dtype + + # bad axis + tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + # make sure works on mixed-type frame + getattr(float_string_frame, name)(axis=0) + getattr(float_string_frame, name)(axis=1) + + if has_numeric_only: + getattr(float_string_frame, name)(axis=0, numeric_only=True) + getattr(float_string_frame, name)(axis=1, numeric_only=True) + getattr(float_frame, name)(axis=0, numeric_only=False) + getattr(float_frame, name)(axis=1, numeric_only=False) + + # all NA case + if has_skipna: + all_na = float_frame * np.NaN + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) + if name in ['sum', 'prod']: + unit = int(name == 'prod') + expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) + tm.assert_series_equal(r0, expected) + expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) + tm.assert_series_equal(r1, expected) + + + def _check_bool_op(self, name, alternative, frame, float_string_frame, + has_skipna=True, has_bool_only=False): + + f = getattr(frame, name) + + if has_skipna: + def skipna_wrapper(x): + nona = x.dropna().values + return alternative(nona) + + def wrapper(x): + return alternative(x.values) + + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal(result0, frame.apply(wrapper)) + tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), + check_dtype=False) # HACK: win32 + else: + skipna_wrapper = alternative + wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) + tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), + check_dtype=False) + + # bad axis + pytest.raises(ValueError, f, axis=2) + + # make sure works on mixed-type frame + mixed = float_string_frame + mixed['_bool_'] = np.random.randn(len(mixed)) > 0 + getattr(mixed, name)(axis=0) + getattr(mixed, name)(axis=1) + + class NonzeroFail(object): + + def __nonzero__(self): + raise ValueError + + mixed['_nonzero_fail_'] = NonzeroFail() + + if has_bool_only: + getattr(mixed, name)(axis=0, bool_only=True) + getattr(mixed, name)(axis=1, bool_only=True) + getattr(frame, name)(axis=0, bool_only=False) + getattr(frame, name)(axis=1, bool_only=False) + + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) + if name == 'any': + assert not r0.any() + assert not r1.any() + else: + assert r0.all() + assert r1.all() + + class TestDataFrameAnalytics(): # ---------------------------------------------------------------------= @@ -803,83 +944,6 @@ def alt(x): assert kurt.name is None assert kurt2.name == 'bar' - def _check_stat_op(self, name, alternative, main_frame, float_frame, - float_string_frame, has_skipna=True, - has_numeric_only=False, check_dtype=True, - check_dates=False, check_less_precise=False, - skipna_alternative=None): - - f = getattr(main_frame, name) - - if check_dates: - df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - _f = getattr(df, name) - result = _f() - assert isinstance(result, Series) - - df['a'] = lrange(len(df)) - result = getattr(df, name)() - assert isinstance(result, Series) - assert len(result) - - if has_skipna: - def wrapper(x): - return alternative(x.values) - - skipna_wrapper = tm._make_skipna_wrapper(alternative, - skipna_alternative) - result0 = f(axis=0, skipna=False) - result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, main_frame.apply(wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise) - # HACK: win32 - tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), - check_dtype=False, - check_less_precise=check_less_precise) - else: - skipna_wrapper = alternative - - result0 = f(axis=0) - result1 = f(axis=1) - tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise) - if name in ['sum', 'prod']: - expected = main_frame.apply(skipna_wrapper, axis=1) - tm.assert_series_equal(result1, expected, check_dtype=False, - check_less_precise=check_less_precise) - - # check dtypes - if check_dtype: - lcd_dtype = main_frame.values.dtype - assert lcd_dtype == result0.dtype - assert lcd_dtype == result1.dtype - - # bad axis - tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) - # make sure works on mixed-type frame - getattr(float_string_frame, name)(axis=0) - getattr(float_string_frame, name)(axis=1) - - if has_numeric_only: - getattr(float_string_frame, name)(axis=0, numeric_only=True) - getattr(float_string_frame, name)(axis=1, numeric_only=True) - getattr(float_frame, name)(axis=0, numeric_only=False) - getattr(float_frame, name)(axis=1, numeric_only=False) - - # all NA case - if has_skipna: - all_na = float_frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name in ['sum', 'prod']: - unit = int(name == 'prod') - expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) - tm.assert_series_equal(r0, expected) - expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) - tm.assert_series_equal(r1, expected) - @pytest.mark.parametrize("dropna, expected", [ (True, {'A': [12], 'B': [10.0], @@ -1336,68 +1400,6 @@ def test_any_all_level_axis_none_raises(self, method): with tm.assert_raises_regex(ValueError, xpr): getattr(df, method)(axis=None, level='out') - def _check_bool_op(self, name, alternative, frame, float_string_frame, - has_skipna=True, has_bool_only=False): - - f = getattr(frame, name) - - if has_skipna: - def skipna_wrapper(x): - nona = x.dropna().values - return alternative(nona) - - def wrapper(x): - return alternative(x.values) - - result0 = f(axis=0, skipna=False) - result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, frame.apply(wrapper)) - tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), - check_dtype=False) # HACK: win32 - else: - skipna_wrapper = alternative - wrapper = alternative - - result0 = f(axis=0) - result1 = f(axis=1) - tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) - tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), - check_dtype=False) - - # bad axis - pytest.raises(ValueError, f, axis=2) - - # make sure works on mixed-type frame - mixed = float_string_frame - mixed['_bool_'] = np.random.randn(len(mixed)) > 0 - getattr(mixed, name)(axis=0) - getattr(mixed, name)(axis=1) - - class NonzeroFail(object): - - def __nonzero__(self): - raise ValueError - - mixed['_nonzero_fail_'] = NonzeroFail() - - if has_bool_only: - getattr(mixed, name)(axis=0, bool_only=True) - getattr(mixed, name)(axis=1, bool_only=True) - getattr(frame, name)(axis=0, bool_only=False) - getattr(frame, name)(axis=1, bool_only=False) - - # all NA case - if has_skipna: - all_na = frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name == 'any': - assert not r0.any() - assert not r1.any() - else: - assert r0.all() - assert r1.all() - # ---------------------------------------------------------------------- # Isin From e1a8c5a47bdcdd0a414ba24ce0d8bae9ea92dc5b Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:35:07 +0200 Subject: [PATCH 05/14] Pure unindent of _check_stat_op and _check_bool_op --- pandas/tests/frame/test_analytics.py | 268 +++++++++++++-------------- 1 file changed, 134 insertions(+), 134 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 44b986fbb59fc..19775fad097f7 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,145 +25,145 @@ import pandas.util._test_decorators as td - def _check_stat_op(self, name, alternative, main_frame, float_frame, - float_string_frame, has_skipna=True, - has_numeric_only=False, check_dtype=True, - check_dates=False, check_less_precise=False, - skipna_alternative=None): - - f = getattr(main_frame, name) - - if check_dates: - df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - _f = getattr(df, name) - result = _f() - assert isinstance(result, Series) - - df['a'] = lrange(len(df)) - result = getattr(df, name)() - assert isinstance(result, Series) - assert len(result) - - if has_skipna: - def wrapper(x): - return alternative(x.values) - - skipna_wrapper = tm._make_skipna_wrapper(alternative, - skipna_alternative) - result0 = f(axis=0, skipna=False) - result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, main_frame.apply(wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise) - # HACK: win32 - tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), - check_dtype=False, - check_less_precise=check_less_precise) - else: - skipna_wrapper = alternative +def _check_stat_op(self, name, alternative, main_frame, float_frame, + float_string_frame, has_skipna=True, + has_numeric_only=False, check_dtype=True, + check_dates=False, check_less_precise=False, + skipna_alternative=None): + + f = getattr(main_frame, name) + + if check_dates: + df = DataFrame({'b': date_range('1/1/2001', periods=2)}) + _f = getattr(df, name) + result = _f() + assert isinstance(result, Series) + + df['a'] = lrange(len(df)) + result = getattr(df, name)() + assert isinstance(result, Series) + assert len(result) + + if has_skipna: + def wrapper(x): + return alternative(x.values) - result0 = f(axis=0) - result1 = f(axis=1) - tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), + skipna_wrapper = tm._make_skipna_wrapper(alternative, + skipna_alternative) + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal(result0, main_frame.apply(wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) + # HACK: win32 + tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), + check_dtype=False, + check_less_precise=check_less_precise) + else: + skipna_wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise) + if name in ['sum', 'prod']: + expected = main_frame.apply(skipna_wrapper, axis=1) + tm.assert_series_equal(result1, expected, check_dtype=False, + check_less_precise=check_less_precise) + + # check dtypes + if check_dtype: + lcd_dtype = main_frame.values.dtype + assert lcd_dtype == result0.dtype + assert lcd_dtype == result1.dtype + + # bad axis + tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + # make sure works on mixed-type frame + getattr(float_string_frame, name)(axis=0) + getattr(float_string_frame, name)(axis=1) + + if has_numeric_only: + getattr(float_string_frame, name)(axis=0, numeric_only=True) + getattr(float_string_frame, name)(axis=1, numeric_only=True) + getattr(float_frame, name)(axis=0, numeric_only=False) + getattr(float_frame, name)(axis=1, numeric_only=False) + + # all NA case + if has_skipna: + all_na = float_frame * np.NaN + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) if name in ['sum', 'prod']: - expected = main_frame.apply(skipna_wrapper, axis=1) - tm.assert_series_equal(result1, expected, check_dtype=False, - check_less_precise=check_less_precise) - - # check dtypes - if check_dtype: - lcd_dtype = main_frame.values.dtype - assert lcd_dtype == result0.dtype - assert lcd_dtype == result1.dtype - - # bad axis - tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) - # make sure works on mixed-type frame - getattr(float_string_frame, name)(axis=0) - getattr(float_string_frame, name)(axis=1) - - if has_numeric_only: - getattr(float_string_frame, name)(axis=0, numeric_only=True) - getattr(float_string_frame, name)(axis=1, numeric_only=True) - getattr(float_frame, name)(axis=0, numeric_only=False) - getattr(float_frame, name)(axis=1, numeric_only=False) - - # all NA case - if has_skipna: - all_na = float_frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name in ['sum', 'prod']: - unit = int(name == 'prod') - expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) - tm.assert_series_equal(r0, expected) - expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) - tm.assert_series_equal(r1, expected) - - - def _check_bool_op(self, name, alternative, frame, float_string_frame, - has_skipna=True, has_bool_only=False): - - f = getattr(frame, name) - - if has_skipna: - def skipna_wrapper(x): - nona = x.dropna().values - return alternative(nona) - - def wrapper(x): - return alternative(x.values) - - result0 = f(axis=0, skipna=False) - result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, frame.apply(wrapper)) - tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), - check_dtype=False) # HACK: win32 + unit = int(name == 'prod') + expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) + tm.assert_series_equal(r0, expected) + expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) + tm.assert_series_equal(r1, expected) + + +def _check_bool_op(self, name, alternative, frame, float_string_frame, + has_skipna=True, has_bool_only=False): + + f = getattr(frame, name) + + if has_skipna: + def skipna_wrapper(x): + nona = x.dropna().values + return alternative(nona) + + def wrapper(x): + return alternative(x.values) + + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal(result0, frame.apply(wrapper)) + tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), + check_dtype=False) # HACK: win32 + else: + skipna_wrapper = alternative + wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) + tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), + check_dtype=False) + + # bad axis + pytest.raises(ValueError, f, axis=2) + + # make sure works on mixed-type frame + mixed = float_string_frame + mixed['_bool_'] = np.random.randn(len(mixed)) > 0 + getattr(mixed, name)(axis=0) + getattr(mixed, name)(axis=1) + + class NonzeroFail(object): + + def __nonzero__(self): + raise ValueError + + mixed['_nonzero_fail_'] = NonzeroFail() + + if has_bool_only: + getattr(mixed, name)(axis=0, bool_only=True) + getattr(mixed, name)(axis=1, bool_only=True) + getattr(frame, name)(axis=0, bool_only=False) + getattr(frame, name)(axis=1, bool_only=False) + + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) + if name == 'any': + assert not r0.any() + assert not r1.any() else: - skipna_wrapper = alternative - wrapper = alternative - - result0 = f(axis=0) - result1 = f(axis=1) - tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) - tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), - check_dtype=False) - - # bad axis - pytest.raises(ValueError, f, axis=2) - - # make sure works on mixed-type frame - mixed = float_string_frame - mixed['_bool_'] = np.random.randn(len(mixed)) > 0 - getattr(mixed, name)(axis=0) - getattr(mixed, name)(axis=1) - - class NonzeroFail(object): - - def __nonzero__(self): - raise ValueError - - mixed['_nonzero_fail_'] = NonzeroFail() - - if has_bool_only: - getattr(mixed, name)(axis=0, bool_only=True) - getattr(mixed, name)(axis=1, bool_only=True) - getattr(frame, name)(axis=0, bool_only=False) - getattr(frame, name)(axis=1, bool_only=False) - - # all NA case - if has_skipna: - all_na = frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name == 'any': - assert not r0.any() - assert not r1.any() - else: - assert r0.all() - assert r1.all() + assert r0.all() + assert r1.all() class TestDataFrameAnalytics(): From 6c4a7027b0bfe64f72a6ac4831f94eee15406567 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:41:42 +0200 Subject: [PATCH 06/14] Make _check_stat_op and _check_bool_op run --- pandas/tests/frame/test_analytics.py | 106 +++++++++++++-------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 19775fad097f7..b0b9f2815cbb9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,7 +25,7 @@ import pandas.util._test_decorators as td -def _check_stat_op(self, name, alternative, main_frame, float_frame, +def _check_stat_op(name, alternative, main_frame, float_frame, float_string_frame, has_skipna=True, has_numeric_only=False, check_dtype=True, check_dates=False, check_less_precise=False, @@ -103,7 +103,7 @@ def wrapper(x): tm.assert_series_equal(r1, expected) -def _check_bool_op(self, name, alternative, frame, float_string_frame, +def _check_bool_op(name, alternative, frame, float_string_frame, has_skipna=True, has_bool_only=False): f = getattr(frame, name) @@ -596,10 +596,10 @@ def test_reduce_mixed_frame(self): def test_count(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: notna(s).sum() - self._check_stat_op('count', f, float_frame_with_na, float_frame, - float_string_frame, has_skipna=False, - has_numeric_only=True, check_dtype=False, - check_dates=True) + _check_stat_op('count', f, float_frame_with_na, float_frame, + float_string_frame, has_skipna=False, + has_numeric_only=True, check_dtype=False, + check_dates=True) # corner case frame = DataFrame() @@ -628,9 +628,9 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): def test_nunique(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) - self._check_stat_op('nunique', f, float_frame_with_na, - float_frame, float_string_frame, has_skipna=False, - check_dtype=False, check_dates=True) + _check_stat_op('nunique', f, float_frame_with_na, + float_frame, float_string_frame, has_skipna=False, + check_dtype=False, check_dates=True) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], @@ -644,15 +644,15 @@ def test_nunique(self, float_frame_with_na, float_frame, def test_sum(self, float_frame_with_na, mixed_float_frame, float_frame, float_string_frame): - self._check_stat_op('sum', np.sum, float_frame_with_na, float_frame, - float_string_frame, has_numeric_only=True, - skipna_alternative=np.nansum) + _check_stat_op('sum', np.sum, float_frame_with_na, float_frame, + float_string_frame, has_numeric_only=True, + skipna_alternative=np.nansum) # mixed types (with upcasting happening) - self._check_stat_op('sum', np.sum, - mixed_float_frame.astype('float32'), float_frame, - float_string_frame, has_numeric_only=True, - check_dtype=False, check_less_precise=True) + _check_stat_op('sum', np.sum, + mixed_float_frame.astype('float32'), float_frame, + float_string_frame, has_numeric_only=True, + check_dtype=False, check_less_precise=True) @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) @@ -679,13 +679,13 @@ def test_stat_operators_attempt_obj_array(self, method): tm.assert_series_equal(result, expected) def test_mean(self, float_frame_with_na, float_frame, float_string_frame): - self._check_stat_op('mean', np.mean, float_frame_with_na, - float_frame, float_string_frame, check_dates=True) + _check_stat_op('mean', np.mean, float_frame_with_na, + float_frame, float_string_frame, check_dates=True) def test_product(self, float_frame_with_na, float_frame, float_string_frame): - self._check_stat_op('product', np.prod, float_frame_with_na, - float_frame, float_string_frame) + _check_stat_op('product', np.prod, float_frame_with_na, + float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") @@ -696,18 +696,18 @@ def wrapper(x): return np.nan return np.median(x) - self._check_stat_op('median', wrapper, float_frame_with_na, - float_frame, float_string_frame, check_dates=True) + _check_stat_op('median', wrapper, float_frame_with_na, + float_frame, float_string_frame, check_dates=True) def test_min(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - self._check_stat_op('min', np.min, float_frame_with_na, - float_frame, float_string_frame, - check_dates=True) - self._check_stat_op('min', np.min, int_frame, float_frame, - float_string_frame) + _check_stat_op('min', np.min, float_frame_with_na, + float_frame, float_string_frame, + check_dates=True) + _check_stat_op('min', np.min, int_frame, float_frame, + float_string_frame) def test_cummin(self, datetime_frame): datetime_frame.loc[5:10, 0] = nan @@ -759,26 +759,26 @@ def test_max(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - self._check_stat_op('max', np.max, float_frame_with_na, - float_frame, float_string_frame, - check_dates=True) - self._check_stat_op('max', np.max, int_frame, float_frame, - float_string_frame) + _check_stat_op('max', np.max, float_frame_with_na, + float_frame, float_string_frame, + check_dates=True) + _check_stat_op('max', np.max, int_frame, float_frame, + float_string_frame) def test_mad(self, float_frame_with_na, float_frame, float_string_frame): f = lambda x: np.abs(x - x.mean()).mean() - self._check_stat_op('mad', f, float_frame_with_na, float_frame, - float_string_frame) + _check_stat_op('mad', f, float_frame_with_na, float_frame, + float_string_frame) def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.var(x, ddof=1) - self._check_stat_op('var', alt, float_frame_with_na, float_frame, - float_string_frame) + _check_stat_op('var', alt, float_frame_with_na, float_frame, + float_string_frame) alt = lambda x: np.std(x, ddof=1) - self._check_stat_op('std', alt, float_frame_with_na, float_frame, - float_string_frame) + _check_stat_op('std', alt, float_frame_with_na, float_frame, + float_string_frame) result = datetime_frame.std(ddof=4) expected = datetime_frame.apply(lambda x: x.std(ddof=4)) @@ -892,8 +892,8 @@ def test_cumprod(self, datetime_frame): def test_sem(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - self._check_stat_op('sem', alt, float_frame_with_na, - float_frame, float_string_frame) + _check_stat_op('sem', alt, float_frame_with_na, + float_frame, float_string_frame) result = datetime_frame.sem(ddof=4) expected = datetime_frame.apply( @@ -917,8 +917,8 @@ def alt(x): return np.nan return skew(x, bias=False) - self._check_stat_op('skew', alt, float_frame_with_na, - float_frame, float_string_frame) + _check_stat_op('skew', alt, float_frame_with_na, + float_frame, float_string_frame) @td.skip_if_no_scipy def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): @@ -929,8 +929,8 @@ def alt(x): return np.nan return kurtosis(x, bias=False) - self._check_stat_op('kurt', alt, float_frame_with_na, - float_frame, float_string_frame) + _check_stat_op('kurt', alt, float_frame_with_na, + float_frame, float_string_frame) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], @@ -1205,9 +1205,9 @@ def wrapper(x): return np.nan return np.median(x) - self._check_stat_op('median', wrapper, int_frame, float_frame, - float_string_frame, check_dtype=False, - check_dates=True) + _check_stat_op('median', wrapper, int_frame, float_frame, + float_string_frame, check_dtype=False, + check_dates=True) # Miscellanea @@ -1263,12 +1263,12 @@ def test_idxmax(self, float_frame, int_frame): # Logical reductions def test_any_all(self, bool_frame_with_na, float_string_frame): - self._check_bool_op('any', np.any, bool_frame_with_na, - float_string_frame, has_skipna=True, - has_bool_only=True) - self._check_bool_op('all', np.all, bool_frame_with_na, - float_string_frame, has_skipna=True, - has_bool_only=True) + _check_bool_op('any', np.any, bool_frame_with_na, + float_string_frame, has_skipna=True, + has_bool_only=True) + _check_bool_op('all', np.all, bool_frame_with_na, + float_string_frame, has_skipna=True, + has_bool_only=True) def test_any_all_extra(self): df = DataFrame({ From 98f324309fd65c2a67bd8de409b9c231b398ff51 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:42:56 +0200 Subject: [PATCH 07/14] Correctly group tests within _check_[stat/bool]_op --- pandas/tests/frame/test_analytics.py | 43 ++++++++++++++-------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b0b9f2815cbb9..364aabfbcdc5a 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -80,15 +80,6 @@ def wrapper(x): # bad axis tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) - # make sure works on mixed-type frame - getattr(float_string_frame, name)(axis=0) - getattr(float_string_frame, name)(axis=1) - - if has_numeric_only: - getattr(float_string_frame, name)(axis=0, numeric_only=True) - getattr(float_string_frame, name)(axis=1, numeric_only=True) - getattr(float_frame, name)(axis=0, numeric_only=False) - getattr(float_frame, name)(axis=1, numeric_only=False) # all NA case if has_skipna: @@ -102,6 +93,16 @@ def wrapper(x): expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) + # make sure works on mixed-type frame + getattr(float_string_frame, name)(axis=0) + getattr(float_string_frame, name)(axis=1) + + if has_numeric_only: + getattr(float_string_frame, name)(axis=0, numeric_only=True) + getattr(float_string_frame, name)(axis=1, numeric_only=True) + getattr(float_frame, name)(axis=0, numeric_only=False) + getattr(float_frame, name)(axis=1, numeric_only=False) + def _check_bool_op(name, alternative, frame, float_string_frame, has_skipna=True, has_bool_only=False): @@ -134,6 +135,18 @@ def wrapper(x): # bad axis pytest.raises(ValueError, f, axis=2) + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) + if name == 'any': + assert not r0.any() + assert not r1.any() + else: + assert r0.all() + assert r1.all() + # make sure works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 @@ -153,18 +166,6 @@ def __nonzero__(self): getattr(frame, name)(axis=0, bool_only=False) getattr(frame, name)(axis=1, bool_only=False) - # all NA case - if has_skipna: - all_na = frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name == 'any': - assert not r0.any() - assert not r1.any() - else: - assert r0.all() - assert r1.all() - class TestDataFrameAnalytics(): From b043bb4fe6099af5525c0f2ccdc0ceae535eefcb Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:44:16 +0200 Subject: [PATCH 08/14] Consistent naming of parameters --- pandas/tests/frame/test_analytics.py | 62 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 364aabfbcdc5a..7bd7250fdb975 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,22 +25,22 @@ import pandas.util._test_decorators as td -def _check_stat_op(name, alternative, main_frame, float_frame, +def _check_stat_op(opname, alternative, main_frame, float_frame, float_string_frame, has_skipna=True, has_numeric_only=False, check_dtype=True, check_dates=False, check_less_precise=False, skipna_alternative=None): - f = getattr(main_frame, name) + f = getattr(main_frame, opname) if check_dates: df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - _f = getattr(df, name) + _f = getattr(df, opname) result = _f() assert isinstance(result, Series) df['a'] = lrange(len(df)) - result = getattr(df, name)() + result = getattr(df, opname)() assert isinstance(result, Series) assert len(result) @@ -67,7 +67,7 @@ def wrapper(x): tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) - if name in ['sum', 'prod']: + if opname in ['sum', 'prod']: expected = main_frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, expected, check_dtype=False, check_less_precise=check_less_precise) @@ -84,30 +84,30 @@ def wrapper(x): # all NA case if has_skipna: all_na = float_frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name in ['sum', 'prod']: - unit = int(name == 'prod') + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname in ['sum', 'prod']: + unit = int(opname == 'prod') expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) tm.assert_series_equal(r0, expected) expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) # make sure works on mixed-type frame - getattr(float_string_frame, name)(axis=0) - getattr(float_string_frame, name)(axis=1) + getattr(float_string_frame, opname)(axis=0) + getattr(float_string_frame, opname)(axis=1) if has_numeric_only: - getattr(float_string_frame, name)(axis=0, numeric_only=True) - getattr(float_string_frame, name)(axis=1, numeric_only=True) - getattr(float_frame, name)(axis=0, numeric_only=False) - getattr(float_frame, name)(axis=1, numeric_only=False) + getattr(float_string_frame, opname)(axis=0, numeric_only=True) + getattr(float_string_frame, opname)(axis=1, numeric_only=True) + getattr(float_frame, opname)(axis=0, numeric_only=False) + getattr(float_frame, opname)(axis=1, numeric_only=False) -def _check_bool_op(name, alternative, frame, float_string_frame, +def _check_bool_op(opname, alternative, main_frame, float_string_frame, has_skipna=True, has_bool_only=False): - f = getattr(frame, name) + f = getattr(main_frame, opname) if has_skipna: def skipna_wrapper(x): @@ -119,8 +119,8 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, frame.apply(wrapper)) - tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), + tm.assert_series_equal(result0, main_frame.apply(wrapper)) + tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), check_dtype=False) # HACK: win32 else: skipna_wrapper = alternative @@ -128,8 +128,8 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) - tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) - tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper)) + tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1), check_dtype=False) # bad axis @@ -137,10 +137,10 @@ def wrapper(x): # all NA case if has_skipna: - all_na = frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name == 'any': + all_na = main_frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname == 'any': assert not r0.any() assert not r1.any() else: @@ -150,8 +150,8 @@ def wrapper(x): # make sure works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 - getattr(mixed, name)(axis=0) - getattr(mixed, name)(axis=1) + getattr(mixed, opname)(axis=0) + getattr(mixed, opname)(axis=1) class NonzeroFail(object): @@ -161,10 +161,10 @@ def __nonzero__(self): mixed['_nonzero_fail_'] = NonzeroFail() if has_bool_only: - getattr(mixed, name)(axis=0, bool_only=True) - getattr(mixed, name)(axis=1, bool_only=True) - getattr(frame, name)(axis=0, bool_only=False) - getattr(frame, name)(axis=1, bool_only=False) + getattr(mixed, opname)(axis=0, bool_only=True) + getattr(mixed, opname)(axis=1, bool_only=True) + getattr(main_frame, opname)(axis=0, bool_only=False) + getattr(main_frame, opname)(axis=1, bool_only=False) class TestDataFrameAnalytics(): From 4a2adeb602594050e688699245d6f71f462f43ff Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:52:45 +0200 Subject: [PATCH 09/14] Break up _check_[stat/bool]_op --- pandas/tests/frame/test_analytics.py | 133 ++++++++++++++------------- 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 7bd7250fdb975..e980ca3e08ddd 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,11 +25,9 @@ import pandas.util._test_decorators as td -def _check_stat_op(opname, alternative, main_frame, float_frame, - float_string_frame, has_skipna=True, - has_numeric_only=False, check_dtype=True, - check_dates=False, check_less_precise=False, - skipna_alternative=None): +def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True, + check_dtype=True, check_dates=False, + check_less_precise=False, skipna_alternative=None): f = getattr(main_frame, opname) @@ -67,6 +65,7 @@ def wrapper(x): tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) + if opname in ['sum', 'prod']: expected = main_frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, expected, check_dtype=False, @@ -83,7 +82,7 @@ def wrapper(x): # all NA case if has_skipna: - all_na = float_frame * np.NaN + all_na = main_frame * np.NaN r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname in ['sum', 'prod']: @@ -93,6 +92,10 @@ def wrapper(x): expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) + +def assert_stat_op_api(opname, float_frame, float_string_frame, + has_numeric_only=False): + # make sure works on mixed-type frame getattr(float_string_frame, opname)(axis=0) getattr(float_string_frame, opname)(axis=1) @@ -104,8 +107,7 @@ def wrapper(x): getattr(float_frame, opname)(axis=1, numeric_only=False) -def _check_bool_op(opname, alternative, main_frame, float_string_frame, - has_skipna=True, has_bool_only=False): +def assert_bool_op_calc(opname, alternative, main_frame, has_skipna=True): f = getattr(main_frame, opname) @@ -119,6 +121,7 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) + tm.assert_series_equal(result0, main_frame.apply(wrapper)) tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), check_dtype=False) # HACK: win32 @@ -128,6 +131,7 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper)) tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1), check_dtype=False) @@ -147,7 +151,10 @@ def wrapper(x): assert r0.all() assert r1.all() - # make sure works on mixed-type frame + +def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, + has_bool_only=False): + # make sure op works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 getattr(mixed, opname)(axis=0) @@ -163,8 +170,8 @@ def __nonzero__(self): if has_bool_only: getattr(mixed, opname)(axis=0, bool_only=True) getattr(mixed, opname)(axis=1, bool_only=True) - getattr(main_frame, opname)(axis=0, bool_only=False) - getattr(main_frame, opname)(axis=1, bool_only=False) + getattr(bool_frame_with_na, opname)(axis=0, bool_only=False) + getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) class TestDataFrameAnalytics(): @@ -597,10 +604,10 @@ def test_reduce_mixed_frame(self): def test_count(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: notna(s).sum() - _check_stat_op('count', f, float_frame_with_na, float_frame, - float_string_frame, has_skipna=False, - has_numeric_only=True, check_dtype=False, - check_dates=True) + assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, + check_dtype=False, check_dates=True) + assert_stat_op_api('count', float_frame, float_string_frame, + has_numeric_only=True) # corner case frame = DataFrame() @@ -629,9 +636,10 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): def test_nunique(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) - _check_stat_op('nunique', f, float_frame_with_na, - float_frame, float_string_frame, has_skipna=False, - check_dtype=False, check_dates=True) + assert_stat_op_calc('nunique', f, float_frame_with_na, + has_skipna=False, check_dtype=False, + check_dates=True) + assert_stat_op_api('nunique', float_frame, float_string_frame) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], @@ -645,15 +653,13 @@ def test_nunique(self, float_frame_with_na, float_frame, def test_sum(self, float_frame_with_na, mixed_float_frame, float_frame, float_string_frame): - _check_stat_op('sum', np.sum, float_frame_with_na, float_frame, - float_string_frame, has_numeric_only=True, - skipna_alternative=np.nansum) - + assert_stat_op_api('sum', float_frame, float_string_frame, + has_numeric_only=True) + assert_stat_op_calc('sum', np.sum, float_frame_with_na, + skipna_alternative=np.nansum) # mixed types (with upcasting happening) - _check_stat_op('sum', np.sum, - mixed_float_frame.astype('float32'), float_frame, - float_string_frame, has_numeric_only=True, - check_dtype=False, check_less_precise=True) + assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'), + check_dtype=False, check_less_precise=True) @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) @@ -680,13 +686,14 @@ def test_stat_operators_attempt_obj_array(self, method): tm.assert_series_equal(result, expected) def test_mean(self, float_frame_with_na, float_frame, float_string_frame): - _check_stat_op('mean', np.mean, float_frame_with_na, - float_frame, float_string_frame, check_dates=True) + assert_stat_op_calc('mean', np.mean, float_frame_with_na, + check_dates=True) + assert_stat_op_api('mean', float_frame, float_string_frame) def test_product(self, float_frame_with_na, float_frame, float_string_frame): - _check_stat_op('product', np.prod, float_frame_with_na, - float_frame, float_string_frame) + assert_stat_op_calc('product', np.prod, float_frame_with_na) + assert_stat_op_api('product', float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") @@ -697,18 +704,18 @@ def wrapper(x): return np.nan return np.median(x) - _check_stat_op('median', wrapper, float_frame_with_na, - float_frame, float_string_frame, check_dates=True) + assert_stat_op_calc('median', wrapper, float_frame_with_na, + check_dates=True) + assert_stat_op_api('median', float_frame, float_string_frame) def test_min(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - _check_stat_op('min', np.min, float_frame_with_na, - float_frame, float_string_frame, - check_dates=True) - _check_stat_op('min', np.min, int_frame, float_frame, - float_string_frame) + assert_stat_op_calc('min', np.min, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('min', np.min, int_frame) + assert_stat_op_api('min', float_frame, float_string_frame) def test_cummin(self, datetime_frame): datetime_frame.loc[5:10, 0] = nan @@ -760,26 +767,25 @@ def test_max(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - _check_stat_op('max', np.max, float_frame_with_na, - float_frame, float_string_frame, - check_dates=True) - _check_stat_op('max', np.max, int_frame, float_frame, - float_string_frame) + assert_stat_op_calc('max', np.max, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('max', np.max, int_frame) + assert_stat_op_api('max', float_frame, float_string_frame) def test_mad(self, float_frame_with_na, float_frame, float_string_frame): f = lambda x: np.abs(x - x.mean()).mean() - _check_stat_op('mad', f, float_frame_with_na, float_frame, - float_string_frame) + assert_stat_op_calc('mad', f, float_frame_with_na) + assert_stat_op_api('mad', float_frame, float_string_frame) def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.var(x, ddof=1) - _check_stat_op('var', alt, float_frame_with_na, float_frame, - float_string_frame) + assert_stat_op_calc('var', alt, float_frame_with_na) + assert_stat_op_api('var', float_frame, float_string_frame) alt = lambda x: np.std(x, ddof=1) - _check_stat_op('std', alt, float_frame_with_na, float_frame, - float_string_frame) + assert_stat_op_calc('std', alt, float_frame_with_na) + assert_stat_op_api('std', float_frame, float_string_frame) result = datetime_frame.std(ddof=4) expected = datetime_frame.apply(lambda x: x.std(ddof=4)) @@ -893,8 +899,8 @@ def test_cumprod(self, datetime_frame): def test_sem(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - _check_stat_op('sem', alt, float_frame_with_na, - float_frame, float_string_frame) + assert_stat_op_calc('sem', alt, float_frame_with_na) + assert_stat_op_api('sem', float_frame, float_string_frame) result = datetime_frame.sem(ddof=4) expected = datetime_frame.apply( @@ -918,8 +924,8 @@ def alt(x): return np.nan return skew(x, bias=False) - _check_stat_op('skew', alt, float_frame_with_na, - float_frame, float_string_frame) + assert_stat_op_calc('skew', alt, float_frame_with_na) + assert_stat_op_api('skew', float_frame, float_string_frame) @td.skip_if_no_scipy def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): @@ -930,8 +936,8 @@ def alt(x): return np.nan return kurtosis(x, bias=False) - _check_stat_op('kurt', alt, float_frame_with_na, - float_frame, float_string_frame) + assert_stat_op_calc('kurt', alt, float_frame_with_na) + assert_stat_op_api('kurt', float_frame, float_string_frame) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], @@ -1206,9 +1212,9 @@ def wrapper(x): return np.nan return np.median(x) - _check_stat_op('median', wrapper, int_frame, float_frame, - float_string_frame, check_dtype=False, - check_dates=True) + assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, + check_dates=True) + assert_stat_op_api('median', float_frame, float_string_frame) # Miscellanea @@ -1263,13 +1269,12 @@ def test_idxmax(self, float_frame, int_frame): # ---------------------------------------------------------------------- # Logical reductions - def test_any_all(self, bool_frame_with_na, float_string_frame): - _check_bool_op('any', np.any, bool_frame_with_na, - float_string_frame, has_skipna=True, - has_bool_only=True) - _check_bool_op('all', np.all, bool_frame_with_na, - float_string_frame, has_skipna=True, - has_bool_only=True) + @pytest.mark.parametrize('opname', ['any', 'all']) + def test_any_all(self, opname, bool_frame_with_na, float_string_frame): + assert_bool_op_calc(opname, getattr(np, opname), bool_frame_with_na, + has_skipna=True) + assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, + has_bool_only=True) def test_any_all_extra(self): df = DataFrame({ From 56020d7ee6d74e5f809456b6b717cdda68feb013 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:53:59 +0200 Subject: [PATCH 10/14] Final touches --- pandas/tests/frame/test_analytics.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index e980ca3e08ddd..c4c4142b357ab 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -33,8 +33,7 @@ def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True, if check_dates: df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - _f = getattr(df, opname) - result = _f() + result = getattr(df, opname)() assert isinstance(result, Series) df['a'] = lrange(len(df)) @@ -86,7 +85,7 @@ def wrapper(x): r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname in ['sum', 'prod']: - unit = int(opname == 'prod') + unit = 1 if opname == 'prod' else 0 # result for empty sum/prod expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) tm.assert_series_equal(r0, expected) expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) @@ -137,7 +136,7 @@ def wrapper(x): check_dtype=False) # bad axis - pytest.raises(ValueError, f, axis=2) + tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) # all NA case if has_skipna: @@ -156,7 +155,7 @@ def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, has_bool_only=False): # make sure op works on mixed-type frame mixed = float_string_frame - mixed['_bool_'] = np.random.randn(len(mixed)) > 0 + mixed['_bool_'] = np.random.randn(len(mixed)) > 0.5 getattr(mixed, opname)(axis=0) getattr(mixed, opname)(axis=1) From 7a56cfbcfa267509fd8139a3a62c8a7226cc4775 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 5 Oct 2018 18:34:45 +0200 Subject: [PATCH 11/14] Revert "Final touches" This reverts commit 56020d7ee6d74e5f809456b6b717cdda68feb013. --- pandas/tests/frame/test_analytics.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index c4c4142b357ab..e980ca3e08ddd 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -33,7 +33,8 @@ def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True, if check_dates: df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - result = getattr(df, opname)() + _f = getattr(df, opname) + result = _f() assert isinstance(result, Series) df['a'] = lrange(len(df)) @@ -85,7 +86,7 @@ def wrapper(x): r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname in ['sum', 'prod']: - unit = 1 if opname == 'prod' else 0 # result for empty sum/prod + unit = int(opname == 'prod') expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) tm.assert_series_equal(r0, expected) expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) @@ -136,7 +137,7 @@ def wrapper(x): check_dtype=False) # bad axis - tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + pytest.raises(ValueError, f, axis=2) # all NA case if has_skipna: @@ -155,7 +156,7 @@ def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, has_bool_only=False): # make sure op works on mixed-type frame mixed = float_string_frame - mixed['_bool_'] = np.random.randn(len(mixed)) > 0.5 + mixed['_bool_'] = np.random.randn(len(mixed)) > 0 getattr(mixed, opname)(axis=0) getattr(mixed, opname)(axis=1) From e197fe753889cf2fae332db2c13885bfbcc4f843 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 5 Oct 2018 18:34:47 +0200 Subject: [PATCH 12/14] Revert "Break up _check_[stat/bool]_op" This reverts commit 4a2adeb602594050e688699245d6f71f462f43ff. --- pandas/tests/frame/test_analytics.py | 133 +++++++++++++-------------- 1 file changed, 64 insertions(+), 69 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index e980ca3e08ddd..7bd7250fdb975 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,9 +25,11 @@ import pandas.util._test_decorators as td -def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True, - check_dtype=True, check_dates=False, - check_less_precise=False, skipna_alternative=None): +def _check_stat_op(opname, alternative, main_frame, float_frame, + float_string_frame, has_skipna=True, + has_numeric_only=False, check_dtype=True, + check_dates=False, check_less_precise=False, + skipna_alternative=None): f = getattr(main_frame, opname) @@ -65,7 +67,6 @@ def wrapper(x): tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) - if opname in ['sum', 'prod']: expected = main_frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, expected, check_dtype=False, @@ -82,7 +83,7 @@ def wrapper(x): # all NA case if has_skipna: - all_na = main_frame * np.NaN + all_na = float_frame * np.NaN r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname in ['sum', 'prod']: @@ -92,10 +93,6 @@ def wrapper(x): expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) - -def assert_stat_op_api(opname, float_frame, float_string_frame, - has_numeric_only=False): - # make sure works on mixed-type frame getattr(float_string_frame, opname)(axis=0) getattr(float_string_frame, opname)(axis=1) @@ -107,7 +104,8 @@ def assert_stat_op_api(opname, float_frame, float_string_frame, getattr(float_frame, opname)(axis=1, numeric_only=False) -def assert_bool_op_calc(opname, alternative, main_frame, has_skipna=True): +def _check_bool_op(opname, alternative, main_frame, float_string_frame, + has_skipna=True, has_bool_only=False): f = getattr(main_frame, opname) @@ -121,7 +119,6 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, main_frame.apply(wrapper)) tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), check_dtype=False) # HACK: win32 @@ -131,7 +128,6 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) - tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper)) tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1), check_dtype=False) @@ -151,10 +147,7 @@ def wrapper(x): assert r0.all() assert r1.all() - -def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, - has_bool_only=False): - # make sure op works on mixed-type frame + # make sure works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 getattr(mixed, opname)(axis=0) @@ -170,8 +163,8 @@ def __nonzero__(self): if has_bool_only: getattr(mixed, opname)(axis=0, bool_only=True) getattr(mixed, opname)(axis=1, bool_only=True) - getattr(bool_frame_with_na, opname)(axis=0, bool_only=False) - getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) + getattr(main_frame, opname)(axis=0, bool_only=False) + getattr(main_frame, opname)(axis=1, bool_only=False) class TestDataFrameAnalytics(): @@ -604,10 +597,10 @@ def test_reduce_mixed_frame(self): def test_count(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: notna(s).sum() - assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, - check_dtype=False, check_dates=True) - assert_stat_op_api('count', float_frame, float_string_frame, - has_numeric_only=True) + _check_stat_op('count', f, float_frame_with_na, float_frame, + float_string_frame, has_skipna=False, + has_numeric_only=True, check_dtype=False, + check_dates=True) # corner case frame = DataFrame() @@ -636,10 +629,9 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): def test_nunique(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) - assert_stat_op_calc('nunique', f, float_frame_with_na, - has_skipna=False, check_dtype=False, - check_dates=True) - assert_stat_op_api('nunique', float_frame, float_string_frame) + _check_stat_op('nunique', f, float_frame_with_na, + float_frame, float_string_frame, has_skipna=False, + check_dtype=False, check_dates=True) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], @@ -653,13 +645,15 @@ def test_nunique(self, float_frame_with_na, float_frame, def test_sum(self, float_frame_with_na, mixed_float_frame, float_frame, float_string_frame): - assert_stat_op_api('sum', float_frame, float_string_frame, - has_numeric_only=True) - assert_stat_op_calc('sum', np.sum, float_frame_with_na, - skipna_alternative=np.nansum) + _check_stat_op('sum', np.sum, float_frame_with_na, float_frame, + float_string_frame, has_numeric_only=True, + skipna_alternative=np.nansum) + # mixed types (with upcasting happening) - assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'), - check_dtype=False, check_less_precise=True) + _check_stat_op('sum', np.sum, + mixed_float_frame.astype('float32'), float_frame, + float_string_frame, has_numeric_only=True, + check_dtype=False, check_less_precise=True) @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) @@ -686,14 +680,13 @@ def test_stat_operators_attempt_obj_array(self, method): tm.assert_series_equal(result, expected) def test_mean(self, float_frame_with_na, float_frame, float_string_frame): - assert_stat_op_calc('mean', np.mean, float_frame_with_na, - check_dates=True) - assert_stat_op_api('mean', float_frame, float_string_frame) + _check_stat_op('mean', np.mean, float_frame_with_na, + float_frame, float_string_frame, check_dates=True) def test_product(self, float_frame_with_na, float_frame, float_string_frame): - assert_stat_op_calc('product', np.prod, float_frame_with_na) - assert_stat_op_api('product', float_frame, float_string_frame) + _check_stat_op('product', np.prod, float_frame_with_na, + float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") @@ -704,18 +697,18 @@ def wrapper(x): return np.nan return np.median(x) - assert_stat_op_calc('median', wrapper, float_frame_with_na, - check_dates=True) - assert_stat_op_api('median', float_frame, float_string_frame) + _check_stat_op('median', wrapper, float_frame_with_na, + float_frame, float_string_frame, check_dates=True) def test_min(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - assert_stat_op_calc('min', np.min, float_frame_with_na, - check_dates=True) - assert_stat_op_calc('min', np.min, int_frame) - assert_stat_op_api('min', float_frame, float_string_frame) + _check_stat_op('min', np.min, float_frame_with_na, + float_frame, float_string_frame, + check_dates=True) + _check_stat_op('min', np.min, int_frame, float_frame, + float_string_frame) def test_cummin(self, datetime_frame): datetime_frame.loc[5:10, 0] = nan @@ -767,25 +760,26 @@ def test_max(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - assert_stat_op_calc('max', np.max, float_frame_with_na, - check_dates=True) - assert_stat_op_calc('max', np.max, int_frame) - assert_stat_op_api('max', float_frame, float_string_frame) + _check_stat_op('max', np.max, float_frame_with_na, + float_frame, float_string_frame, + check_dates=True) + _check_stat_op('max', np.max, int_frame, float_frame, + float_string_frame) def test_mad(self, float_frame_with_na, float_frame, float_string_frame): f = lambda x: np.abs(x - x.mean()).mean() - assert_stat_op_calc('mad', f, float_frame_with_na) - assert_stat_op_api('mad', float_frame, float_string_frame) + _check_stat_op('mad', f, float_frame_with_na, float_frame, + float_string_frame) def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.var(x, ddof=1) - assert_stat_op_calc('var', alt, float_frame_with_na) - assert_stat_op_api('var', float_frame, float_string_frame) + _check_stat_op('var', alt, float_frame_with_na, float_frame, + float_string_frame) alt = lambda x: np.std(x, ddof=1) - assert_stat_op_calc('std', alt, float_frame_with_na) - assert_stat_op_api('std', float_frame, float_string_frame) + _check_stat_op('std', alt, float_frame_with_na, float_frame, + float_string_frame) result = datetime_frame.std(ddof=4) expected = datetime_frame.apply(lambda x: x.std(ddof=4)) @@ -899,8 +893,8 @@ def test_cumprod(self, datetime_frame): def test_sem(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - assert_stat_op_calc('sem', alt, float_frame_with_na) - assert_stat_op_api('sem', float_frame, float_string_frame) + _check_stat_op('sem', alt, float_frame_with_na, + float_frame, float_string_frame) result = datetime_frame.sem(ddof=4) expected = datetime_frame.apply( @@ -924,8 +918,8 @@ def alt(x): return np.nan return skew(x, bias=False) - assert_stat_op_calc('skew', alt, float_frame_with_na) - assert_stat_op_api('skew', float_frame, float_string_frame) + _check_stat_op('skew', alt, float_frame_with_na, + float_frame, float_string_frame) @td.skip_if_no_scipy def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): @@ -936,8 +930,8 @@ def alt(x): return np.nan return kurtosis(x, bias=False) - assert_stat_op_calc('kurt', alt, float_frame_with_na) - assert_stat_op_api('kurt', float_frame, float_string_frame) + _check_stat_op('kurt', alt, float_frame_with_na, + float_frame, float_string_frame) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], @@ -1212,9 +1206,9 @@ def wrapper(x): return np.nan return np.median(x) - assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, - check_dates=True) - assert_stat_op_api('median', float_frame, float_string_frame) + _check_stat_op('median', wrapper, int_frame, float_frame, + float_string_frame, check_dtype=False, + check_dates=True) # Miscellanea @@ -1269,12 +1263,13 @@ def test_idxmax(self, float_frame, int_frame): # ---------------------------------------------------------------------- # Logical reductions - @pytest.mark.parametrize('opname', ['any', 'all']) - def test_any_all(self, opname, bool_frame_with_na, float_string_frame): - assert_bool_op_calc(opname, getattr(np, opname), bool_frame_with_na, - has_skipna=True) - assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, - has_bool_only=True) + def test_any_all(self, bool_frame_with_na, float_string_frame): + _check_bool_op('any', np.any, bool_frame_with_na, + float_string_frame, has_skipna=True, + has_bool_only=True) + _check_bool_op('all', np.all, bool_frame_with_na, + float_string_frame, has_skipna=True, + has_bool_only=True) def test_any_all_extra(self): df = DataFrame({ From 48272d9398f0b547d77c9e9774c102604ce3156b Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 5 Oct 2018 18:34:49 +0200 Subject: [PATCH 13/14] Revert "Consistent naming of parameters" This reverts commit b043bb4fe6099af5525c0f2ccdc0ceae535eefcb. --- pandas/tests/frame/test_analytics.py | 62 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 7bd7250fdb975..364aabfbcdc5a 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,22 +25,22 @@ import pandas.util._test_decorators as td -def _check_stat_op(opname, alternative, main_frame, float_frame, +def _check_stat_op(name, alternative, main_frame, float_frame, float_string_frame, has_skipna=True, has_numeric_only=False, check_dtype=True, check_dates=False, check_less_precise=False, skipna_alternative=None): - f = getattr(main_frame, opname) + f = getattr(main_frame, name) if check_dates: df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - _f = getattr(df, opname) + _f = getattr(df, name) result = _f() assert isinstance(result, Series) df['a'] = lrange(len(df)) - result = getattr(df, opname)() + result = getattr(df, name)() assert isinstance(result, Series) assert len(result) @@ -67,7 +67,7 @@ def wrapper(x): tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) - if opname in ['sum', 'prod']: + if name in ['sum', 'prod']: expected = main_frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, expected, check_dtype=False, check_less_precise=check_less_precise) @@ -84,30 +84,30 @@ def wrapper(x): # all NA case if has_skipna: all_na = float_frame * np.NaN - r0 = getattr(all_na, opname)(axis=0) - r1 = getattr(all_na, opname)(axis=1) - if opname in ['sum', 'prod']: - unit = int(opname == 'prod') + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) + if name in ['sum', 'prod']: + unit = int(name == 'prod') expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) tm.assert_series_equal(r0, expected) expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) # make sure works on mixed-type frame - getattr(float_string_frame, opname)(axis=0) - getattr(float_string_frame, opname)(axis=1) + getattr(float_string_frame, name)(axis=0) + getattr(float_string_frame, name)(axis=1) if has_numeric_only: - getattr(float_string_frame, opname)(axis=0, numeric_only=True) - getattr(float_string_frame, opname)(axis=1, numeric_only=True) - getattr(float_frame, opname)(axis=0, numeric_only=False) - getattr(float_frame, opname)(axis=1, numeric_only=False) + getattr(float_string_frame, name)(axis=0, numeric_only=True) + getattr(float_string_frame, name)(axis=1, numeric_only=True) + getattr(float_frame, name)(axis=0, numeric_only=False) + getattr(float_frame, name)(axis=1, numeric_only=False) -def _check_bool_op(opname, alternative, main_frame, float_string_frame, +def _check_bool_op(name, alternative, frame, float_string_frame, has_skipna=True, has_bool_only=False): - f = getattr(main_frame, opname) + f = getattr(frame, name) if has_skipna: def skipna_wrapper(x): @@ -119,8 +119,8 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, main_frame.apply(wrapper)) - tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), + tm.assert_series_equal(result0, frame.apply(wrapper)) + tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), check_dtype=False) # HACK: win32 else: skipna_wrapper = alternative @@ -128,8 +128,8 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) - tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper)) - tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1), + tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) + tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), check_dtype=False) # bad axis @@ -137,10 +137,10 @@ def wrapper(x): # all NA case if has_skipna: - all_na = main_frame * np.NaN - r0 = getattr(all_na, opname)(axis=0) - r1 = getattr(all_na, opname)(axis=1) - if opname == 'any': + all_na = frame * np.NaN + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) + if name == 'any': assert not r0.any() assert not r1.any() else: @@ -150,8 +150,8 @@ def wrapper(x): # make sure works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 - getattr(mixed, opname)(axis=0) - getattr(mixed, opname)(axis=1) + getattr(mixed, name)(axis=0) + getattr(mixed, name)(axis=1) class NonzeroFail(object): @@ -161,10 +161,10 @@ def __nonzero__(self): mixed['_nonzero_fail_'] = NonzeroFail() if has_bool_only: - getattr(mixed, opname)(axis=0, bool_only=True) - getattr(mixed, opname)(axis=1, bool_only=True) - getattr(main_frame, opname)(axis=0, bool_only=False) - getattr(main_frame, opname)(axis=1, bool_only=False) + getattr(mixed, name)(axis=0, bool_only=True) + getattr(mixed, name)(axis=1, bool_only=True) + getattr(frame, name)(axis=0, bool_only=False) + getattr(frame, name)(axis=1, bool_only=False) class TestDataFrameAnalytics(): From c227fa225972983511a536c82b4300f2fd99b30c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 5 Oct 2018 18:34:51 +0200 Subject: [PATCH 14/14] Revert "Correctly group tests within _check_[stat/bool]_op" This reverts commit 98f324309fd65c2a67bd8de409b9c231b398ff51. --- pandas/tests/frame/test_analytics.py | 43 ++++++++++++++-------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 364aabfbcdc5a..b0b9f2815cbb9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -80,6 +80,15 @@ def wrapper(x): # bad axis tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + # make sure works on mixed-type frame + getattr(float_string_frame, name)(axis=0) + getattr(float_string_frame, name)(axis=1) + + if has_numeric_only: + getattr(float_string_frame, name)(axis=0, numeric_only=True) + getattr(float_string_frame, name)(axis=1, numeric_only=True) + getattr(float_frame, name)(axis=0, numeric_only=False) + getattr(float_frame, name)(axis=1, numeric_only=False) # all NA case if has_skipna: @@ -93,16 +102,6 @@ def wrapper(x): expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) - # make sure works on mixed-type frame - getattr(float_string_frame, name)(axis=0) - getattr(float_string_frame, name)(axis=1) - - if has_numeric_only: - getattr(float_string_frame, name)(axis=0, numeric_only=True) - getattr(float_string_frame, name)(axis=1, numeric_only=True) - getattr(float_frame, name)(axis=0, numeric_only=False) - getattr(float_frame, name)(axis=1, numeric_only=False) - def _check_bool_op(name, alternative, frame, float_string_frame, has_skipna=True, has_bool_only=False): @@ -135,18 +134,6 @@ def wrapper(x): # bad axis pytest.raises(ValueError, f, axis=2) - # all NA case - if has_skipna: - all_na = frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name == 'any': - assert not r0.any() - assert not r1.any() - else: - assert r0.all() - assert r1.all() - # make sure works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 @@ -166,6 +153,18 @@ def __nonzero__(self): getattr(frame, name)(axis=0, bool_only=False) getattr(frame, name)(axis=1, bool_only=False) + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) + if name == 'any': + assert not r0.any() + assert not r1.any() + else: + assert r0.all() + assert r1.all() + class TestDataFrameAnalytics():