From 4ba952e9bca41125729b2a3e5f0e45c338cf2833 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:42:56 +0200 Subject: [PATCH 1/5] Correctly group tests within _check_[stat/bool]_op --- pandas/tests/frame/test_analytics.py | 43 ++++++++++++++-------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b0b9f2815cbb9..364aabfbcdc5a 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -80,15 +80,6 @@ def wrapper(x): # bad axis tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) - # make sure works on mixed-type frame - getattr(float_string_frame, name)(axis=0) - getattr(float_string_frame, name)(axis=1) - - if has_numeric_only: - getattr(float_string_frame, name)(axis=0, numeric_only=True) - getattr(float_string_frame, name)(axis=1, numeric_only=True) - getattr(float_frame, name)(axis=0, numeric_only=False) - getattr(float_frame, name)(axis=1, numeric_only=False) # all NA case if has_skipna: @@ -102,6 +93,16 @@ def wrapper(x): expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) + # make sure works on mixed-type frame + getattr(float_string_frame, name)(axis=0) + getattr(float_string_frame, name)(axis=1) + + if has_numeric_only: + getattr(float_string_frame, name)(axis=0, numeric_only=True) + getattr(float_string_frame, name)(axis=1, numeric_only=True) + getattr(float_frame, name)(axis=0, numeric_only=False) + getattr(float_frame, name)(axis=1, numeric_only=False) + def _check_bool_op(name, alternative, frame, float_string_frame, has_skipna=True, has_bool_only=False): @@ -134,6 +135,18 @@ def wrapper(x): # bad axis pytest.raises(ValueError, f, axis=2) + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, name)(axis=0) + r1 = getattr(all_na, name)(axis=1) + if name == 'any': + assert not r0.any() + assert not r1.any() + else: + assert r0.all() + assert r1.all() + # make sure works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 @@ -153,18 +166,6 @@ def __nonzero__(self): getattr(frame, name)(axis=0, bool_only=False) getattr(frame, name)(axis=1, bool_only=False) - # all NA case - if has_skipna: - all_na = frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name == 'any': - assert not r0.any() - assert not r1.any() - else: - assert r0.all() - assert r1.all() - class TestDataFrameAnalytics(): From 2dbb1f87dd1043e794d167ac6d7ebd2de4449b80 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:44:16 +0200 Subject: [PATCH 2/5] Consistent naming of parameters --- pandas/tests/frame/test_analytics.py | 62 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 364aabfbcdc5a..7bd7250fdb975 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,22 +25,22 @@ import pandas.util._test_decorators as td -def _check_stat_op(name, alternative, main_frame, float_frame, +def _check_stat_op(opname, alternative, main_frame, float_frame, float_string_frame, has_skipna=True, has_numeric_only=False, check_dtype=True, check_dates=False, check_less_precise=False, skipna_alternative=None): - f = getattr(main_frame, name) + f = getattr(main_frame, opname) if check_dates: df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - _f = getattr(df, name) + _f = getattr(df, opname) result = _f() assert isinstance(result, Series) df['a'] = lrange(len(df)) - result = getattr(df, name)() + result = getattr(df, opname)() assert isinstance(result, Series) assert len(result) @@ -67,7 +67,7 @@ def wrapper(x): tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) - if name in ['sum', 'prod']: + if opname in ['sum', 'prod']: expected = main_frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, expected, check_dtype=False, check_less_precise=check_less_precise) @@ -84,30 +84,30 @@ def wrapper(x): # all NA case if has_skipna: all_na = float_frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name in ['sum', 'prod']: - unit = int(name == 'prod') + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname in ['sum', 'prod']: + unit = int(opname == 'prod') expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) tm.assert_series_equal(r0, expected) expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) # make sure works on mixed-type frame - getattr(float_string_frame, name)(axis=0) - getattr(float_string_frame, name)(axis=1) + getattr(float_string_frame, opname)(axis=0) + getattr(float_string_frame, opname)(axis=1) if has_numeric_only: - getattr(float_string_frame, name)(axis=0, numeric_only=True) - getattr(float_string_frame, name)(axis=1, numeric_only=True) - getattr(float_frame, name)(axis=0, numeric_only=False) - getattr(float_frame, name)(axis=1, numeric_only=False) + getattr(float_string_frame, opname)(axis=0, numeric_only=True) + getattr(float_string_frame, opname)(axis=1, numeric_only=True) + getattr(float_frame, opname)(axis=0, numeric_only=False) + getattr(float_frame, opname)(axis=1, numeric_only=False) -def _check_bool_op(name, alternative, frame, float_string_frame, +def _check_bool_op(opname, alternative, main_frame, float_string_frame, has_skipna=True, has_bool_only=False): - f = getattr(frame, name) + f = getattr(main_frame, opname) if has_skipna: def skipna_wrapper(x): @@ -119,8 +119,8 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, frame.apply(wrapper)) - tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), + tm.assert_series_equal(result0, main_frame.apply(wrapper)) + tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), check_dtype=False) # HACK: win32 else: skipna_wrapper = alternative @@ -128,8 +128,8 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) - tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) - tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper)) + tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1), check_dtype=False) # bad axis @@ -137,10 +137,10 @@ def wrapper(x): # all NA case if has_skipna: - all_na = frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name == 'any': + all_na = main_frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname == 'any': assert not r0.any() assert not r1.any() else: @@ -150,8 +150,8 @@ def wrapper(x): # make sure works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 - getattr(mixed, name)(axis=0) - getattr(mixed, name)(axis=1) + getattr(mixed, opname)(axis=0) + getattr(mixed, opname)(axis=1) class NonzeroFail(object): @@ -161,10 +161,10 @@ def __nonzero__(self): mixed['_nonzero_fail_'] = NonzeroFail() if has_bool_only: - getattr(mixed, name)(axis=0, bool_only=True) - getattr(mixed, name)(axis=1, bool_only=True) - getattr(frame, name)(axis=0, bool_only=False) - getattr(frame, name)(axis=1, bool_only=False) + getattr(mixed, opname)(axis=0, bool_only=True) + getattr(mixed, opname)(axis=1, bool_only=True) + getattr(main_frame, opname)(axis=0, bool_only=False) + getattr(main_frame, opname)(axis=1, bool_only=False) class TestDataFrameAnalytics(): From fb8f0fa0076a37517882ac99c00b264902f01acc Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:52:45 +0200 Subject: [PATCH 3/5] Break up _check_[stat/bool]_op --- pandas/tests/frame/test_analytics.py | 133 ++++++++++++++------------- 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 7bd7250fdb975..e980ca3e08ddd 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,11 +25,9 @@ import pandas.util._test_decorators as td -def _check_stat_op(opname, alternative, main_frame, float_frame, - float_string_frame, has_skipna=True, - has_numeric_only=False, check_dtype=True, - check_dates=False, check_less_precise=False, - skipna_alternative=None): +def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True, + check_dtype=True, check_dates=False, + check_less_precise=False, skipna_alternative=None): f = getattr(main_frame, opname) @@ -67,6 +65,7 @@ def wrapper(x): tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) + if opname in ['sum', 'prod']: expected = main_frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, expected, check_dtype=False, @@ -83,7 +82,7 @@ def wrapper(x): # all NA case if has_skipna: - all_na = float_frame * np.NaN + all_na = main_frame * np.NaN r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname in ['sum', 'prod']: @@ -93,6 +92,10 @@ def wrapper(x): expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) + +def assert_stat_op_api(opname, float_frame, float_string_frame, + has_numeric_only=False): + # make sure works on mixed-type frame getattr(float_string_frame, opname)(axis=0) getattr(float_string_frame, opname)(axis=1) @@ -104,8 +107,7 @@ def wrapper(x): getattr(float_frame, opname)(axis=1, numeric_only=False) -def _check_bool_op(opname, alternative, main_frame, float_string_frame, - has_skipna=True, has_bool_only=False): +def assert_bool_op_calc(opname, alternative, main_frame, has_skipna=True): f = getattr(main_frame, opname) @@ -119,6 +121,7 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) + tm.assert_series_equal(result0, main_frame.apply(wrapper)) tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), check_dtype=False) # HACK: win32 @@ -128,6 +131,7 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper)) tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1), check_dtype=False) @@ -147,7 +151,10 @@ def wrapper(x): assert r0.all() assert r1.all() - # make sure works on mixed-type frame + +def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, + has_bool_only=False): + # make sure op works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0 getattr(mixed, opname)(axis=0) @@ -163,8 +170,8 @@ def __nonzero__(self): if has_bool_only: getattr(mixed, opname)(axis=0, bool_only=True) getattr(mixed, opname)(axis=1, bool_only=True) - getattr(main_frame, opname)(axis=0, bool_only=False) - getattr(main_frame, opname)(axis=1, bool_only=False) + getattr(bool_frame_with_na, opname)(axis=0, bool_only=False) + getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) class TestDataFrameAnalytics(): @@ -597,10 +604,10 @@ def test_reduce_mixed_frame(self): def test_count(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: notna(s).sum() - _check_stat_op('count', f, float_frame_with_na, float_frame, - float_string_frame, has_skipna=False, - has_numeric_only=True, check_dtype=False, - check_dates=True) + assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, + check_dtype=False, check_dates=True) + assert_stat_op_api('count', float_frame, float_string_frame, + has_numeric_only=True) # corner case frame = DataFrame() @@ -629,9 +636,10 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): def test_nunique(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) - _check_stat_op('nunique', f, float_frame_with_na, - float_frame, float_string_frame, has_skipna=False, - check_dtype=False, check_dates=True) + assert_stat_op_calc('nunique', f, float_frame_with_na, + has_skipna=False, check_dtype=False, + check_dates=True) + assert_stat_op_api('nunique', float_frame, float_string_frame) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], @@ -645,15 +653,13 @@ def test_nunique(self, float_frame_with_na, float_frame, def test_sum(self, float_frame_with_na, mixed_float_frame, float_frame, float_string_frame): - _check_stat_op('sum', np.sum, float_frame_with_na, float_frame, - float_string_frame, has_numeric_only=True, - skipna_alternative=np.nansum) - + assert_stat_op_api('sum', float_frame, float_string_frame, + has_numeric_only=True) + assert_stat_op_calc('sum', np.sum, float_frame_with_na, + skipna_alternative=np.nansum) # mixed types (with upcasting happening) - _check_stat_op('sum', np.sum, - mixed_float_frame.astype('float32'), float_frame, - float_string_frame, has_numeric_only=True, - check_dtype=False, check_less_precise=True) + assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'), + check_dtype=False, check_less_precise=True) @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) @@ -680,13 +686,14 @@ def test_stat_operators_attempt_obj_array(self, method): tm.assert_series_equal(result, expected) def test_mean(self, float_frame_with_na, float_frame, float_string_frame): - _check_stat_op('mean', np.mean, float_frame_with_na, - float_frame, float_string_frame, check_dates=True) + assert_stat_op_calc('mean', np.mean, float_frame_with_na, + check_dates=True) + assert_stat_op_api('mean', float_frame, float_string_frame) def test_product(self, float_frame_with_na, float_frame, float_string_frame): - _check_stat_op('product', np.prod, float_frame_with_na, - float_frame, float_string_frame) + assert_stat_op_calc('product', np.prod, float_frame_with_na) + assert_stat_op_api('product', float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") @@ -697,18 +704,18 @@ def wrapper(x): return np.nan return np.median(x) - _check_stat_op('median', wrapper, float_frame_with_na, - float_frame, float_string_frame, check_dates=True) + assert_stat_op_calc('median', wrapper, float_frame_with_na, + check_dates=True) + assert_stat_op_api('median', float_frame, float_string_frame) def test_min(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - _check_stat_op('min', np.min, float_frame_with_na, - float_frame, float_string_frame, - check_dates=True) - _check_stat_op('min', np.min, int_frame, float_frame, - float_string_frame) + assert_stat_op_calc('min', np.min, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('min', np.min, int_frame) + assert_stat_op_api('min', float_frame, float_string_frame) def test_cummin(self, datetime_frame): datetime_frame.loc[5:10, 0] = nan @@ -760,26 +767,25 @@ def test_max(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - _check_stat_op('max', np.max, float_frame_with_na, - float_frame, float_string_frame, - check_dates=True) - _check_stat_op('max', np.max, int_frame, float_frame, - float_string_frame) + assert_stat_op_calc('max', np.max, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('max', np.max, int_frame) + assert_stat_op_api('max', float_frame, float_string_frame) def test_mad(self, float_frame_with_na, float_frame, float_string_frame): f = lambda x: np.abs(x - x.mean()).mean() - _check_stat_op('mad', f, float_frame_with_na, float_frame, - float_string_frame) + assert_stat_op_calc('mad', f, float_frame_with_na) + assert_stat_op_api('mad', float_frame, float_string_frame) def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.var(x, ddof=1) - _check_stat_op('var', alt, float_frame_with_na, float_frame, - float_string_frame) + assert_stat_op_calc('var', alt, float_frame_with_na) + assert_stat_op_api('var', float_frame, float_string_frame) alt = lambda x: np.std(x, ddof=1) - _check_stat_op('std', alt, float_frame_with_na, float_frame, - float_string_frame) + assert_stat_op_calc('std', alt, float_frame_with_na) + assert_stat_op_api('std', float_frame, float_string_frame) result = datetime_frame.std(ddof=4) expected = datetime_frame.apply(lambda x: x.std(ddof=4)) @@ -893,8 +899,8 @@ def test_cumprod(self, datetime_frame): def test_sem(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - _check_stat_op('sem', alt, float_frame_with_na, - float_frame, float_string_frame) + assert_stat_op_calc('sem', alt, float_frame_with_na) + assert_stat_op_api('sem', float_frame, float_string_frame) result = datetime_frame.sem(ddof=4) expected = datetime_frame.apply( @@ -918,8 +924,8 @@ def alt(x): return np.nan return skew(x, bias=False) - _check_stat_op('skew', alt, float_frame_with_na, - float_frame, float_string_frame) + assert_stat_op_calc('skew', alt, float_frame_with_na) + assert_stat_op_api('skew', float_frame, float_string_frame) @td.skip_if_no_scipy def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): @@ -930,8 +936,8 @@ def alt(x): return np.nan return kurtosis(x, bias=False) - _check_stat_op('kurt', alt, float_frame_with_na, - float_frame, float_string_frame) + assert_stat_op_calc('kurt', alt, float_frame_with_na) + assert_stat_op_api('kurt', float_frame, float_string_frame) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], @@ -1206,9 +1212,9 @@ def wrapper(x): return np.nan return np.median(x) - _check_stat_op('median', wrapper, int_frame, float_frame, - float_string_frame, check_dtype=False, - check_dates=True) + assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, + check_dates=True) + assert_stat_op_api('median', float_frame, float_string_frame) # Miscellanea @@ -1263,13 +1269,12 @@ def test_idxmax(self, float_frame, int_frame): # ---------------------------------------------------------------------- # Logical reductions - def test_any_all(self, bool_frame_with_na, float_string_frame): - _check_bool_op('any', np.any, bool_frame_with_na, - float_string_frame, has_skipna=True, - has_bool_only=True) - _check_bool_op('all', np.all, bool_frame_with_na, - float_string_frame, has_skipna=True, - has_bool_only=True) + @pytest.mark.parametrize('opname', ['any', 'all']) + def test_any_all(self, opname, bool_frame_with_na, float_string_frame): + assert_bool_op_calc(opname, getattr(np, opname), bool_frame_with_na, + has_skipna=True) + assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, + has_bool_only=True) def test_any_all_extra(self): df = DataFrame({ From a7274b92a0f04806d913d17fa1cd8f9748085de6 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 25 Sep 2018 18:53:59 +0200 Subject: [PATCH 4/5] Final touches --- pandas/tests/frame/test_analytics.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index e980ca3e08ddd..c4c4142b357ab 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -33,8 +33,7 @@ def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True, if check_dates: df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - _f = getattr(df, opname) - result = _f() + result = getattr(df, opname)() assert isinstance(result, Series) df['a'] = lrange(len(df)) @@ -86,7 +85,7 @@ def wrapper(x): r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname in ['sum', 'prod']: - unit = int(opname == 'prod') + unit = 1 if opname == 'prod' else 0 # result for empty sum/prod expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) tm.assert_series_equal(r0, expected) expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) @@ -137,7 +136,7 @@ def wrapper(x): check_dtype=False) # bad axis - pytest.raises(ValueError, f, axis=2) + tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) # all NA case if has_skipna: @@ -156,7 +155,7 @@ def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, has_bool_only=False): # make sure op works on mixed-type frame mixed = float_string_frame - mixed['_bool_'] = np.random.randn(len(mixed)) > 0 + mixed['_bool_'] = np.random.randn(len(mixed)) > 0.5 getattr(mixed, opname)(axis=0) getattr(mixed, opname)(axis=1) From 4dd7f90d296c66912235f5b435fca0d3505ad877 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 7 Oct 2018 01:59:51 +0200 Subject: [PATCH 5/5] Review (jreback) --- pandas/tests/frame/test_analytics.py | 100 ++++++++++++++++++++++----- 1 file changed, 84 insertions(+), 16 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index c4c4142b357ab..5327e3fcbea76 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,11 +25,36 @@ import pandas.util._test_decorators as td -def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True, +def assert_stat_op_calc(opname, alternative, frame, has_skipna=True, check_dtype=True, check_dates=False, check_less_precise=False, skipna_alternative=None): - - f = getattr(main_frame, opname) + """ + Check that operator opname works as advertised on frame + + Parameters + ---------- + opname : string + Name of the operator to test on frame + alternative : function + Function that opname is tested against; i.e. "frame.opname()" should + equal "alternative(frame)". + frame : DataFrame + The object that the tests are executed on + has_skipna : bool, default True + Whether the method "opname" has the kwarg "skip_na" + check_dtype : bool, default True + Whether the dtypes of the result of "frame.opname()" and + "alternative(frame)" should be checked. + check_dates : bool, default false + Whether opname should be tested on a Datetime Series + check_less_precise : bool, default False + Whether results should only be compared approximately; + passed on to tm.assert_series_equal + skipna_alternative : function, default None + NaN-safe version of alternative + """ + + f = getattr(frame, opname) if check_dates: df = DataFrame({'b': date_range('1/1/2001', periods=2)}) @@ -49,11 +74,11 @@ def wrapper(x): skipna_alternative) result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, main_frame.apply(wrapper), + tm.assert_series_equal(result0, frame.apply(wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) # HACK: win32 - tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), + tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), check_dtype=False, check_less_precise=check_less_precise) else: @@ -61,18 +86,18 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) - tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), + tm.assert_series_equal(result0, frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) if opname in ['sum', 'prod']: - expected = main_frame.apply(skipna_wrapper, axis=1) + expected = frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, expected, check_dtype=False, check_less_precise=check_less_precise) # check dtypes if check_dtype: - lcd_dtype = main_frame.values.dtype + lcd_dtype = frame.values.dtype assert lcd_dtype == result0.dtype assert lcd_dtype == result1.dtype @@ -81,7 +106,7 @@ def wrapper(x): # all NA case if has_skipna: - all_na = main_frame * np.NaN + all_na = frame * np.NaN r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname in ['sum', 'prod']: @@ -94,6 +119,20 @@ def wrapper(x): def assert_stat_op_api(opname, float_frame, float_string_frame, has_numeric_only=False): + """ + Check that API for operator opname works as advertised on frame + + Parameters + ---------- + opname : string + Name of the operator to test on frame + float_frame : DataFrame + DataFrame with columns of type float + float_string_frame : DataFrame + DataFrame with both float and string columns + has_numeric_only : bool, default False + Whether the method "opname" has the kwarg "numeric_only" + """ # make sure works on mixed-type frame getattr(float_string_frame, opname)(axis=0) @@ -106,9 +145,24 @@ def assert_stat_op_api(opname, float_frame, float_string_frame, getattr(float_frame, opname)(axis=1, numeric_only=False) -def assert_bool_op_calc(opname, alternative, main_frame, has_skipna=True): +def assert_bool_op_calc(opname, alternative, frame, has_skipna=True): + """ + Check that bool operator opname works as advertised on frame + + Parameters + ---------- + opname : string + Name of the operator to test on frame + alternative : function + Function that opname is tested against; i.e. "frame.opname()" should + equal "alternative(frame)". + frame : DataFrame + The object that the tests are executed on + has_skipna : bool, default True + Whether the method "opname" has the kwarg "skip_na" + """ - f = getattr(main_frame, opname) + f = getattr(frame, opname) if has_skipna: def skipna_wrapper(x): @@ -121,8 +175,8 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, main_frame.apply(wrapper)) - tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), + tm.assert_series_equal(result0, frame.apply(wrapper)) + tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), check_dtype=False) # HACK: win32 else: skipna_wrapper = alternative @@ -131,8 +185,8 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) - tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper)) - tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1), + tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) + tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), check_dtype=False) # bad axis @@ -140,7 +194,7 @@ def wrapper(x): # all NA case if has_skipna: - all_na = main_frame * np.NaN + all_na = frame * np.NaN r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname == 'any': @@ -153,6 +207,20 @@ def wrapper(x): def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, has_bool_only=False): + """ + Check that API for boolean operator opname works as advertised on frame + + Parameters + ---------- + opname : string + Name of the operator to test on frame + float_frame : DataFrame + DataFrame with columns of type float + float_string_frame : DataFrame + DataFrame with both float and string columns + has_bool_only : bool, default False + Whether the method "opname" has the kwarg "bool_only" + """ # make sure op works on mixed-type frame mixed = float_string_frame mixed['_bool_'] = np.random.randn(len(mixed)) > 0.5