pandas-dev · jreback · Oct 7, 2018 · Sep 25, 2018 · Sep 25, 2018 · Sep 25, 2018
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -25,22 +25,19 @@
 import pandas.util._test_decorators as td
 
 
-def _check_stat_op(name, alternative, main_frame, float_frame,
-                   float_string_frame, has_skipna=True,
-                   has_numeric_only=False, check_dtype=True,
-                   check_dates=False, check_less_precise=False,
-                   skipna_alternative=None):
+def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True,
+                        check_dtype=True, check_dates=False,
+                        check_less_precise=False, skipna_alternative=None):
 
-    f = getattr(main_frame, name)
+    f = getattr(main_frame, opname)
 
     if check_dates:
         df = DataFrame({'b': date_range('1/1/2001', periods=2)})
-        _f = getattr(df, name)
-        result = _f()
+        result = getattr(df, opname)()
         assert isinstance(result, Series)
 
         df['a'] = lrange(len(df))
-        result = getattr(df, name)()
+        result = getattr(df, opname)()
         assert isinstance(result, Series)
         assert len(result)
 
@@ -67,7 +64,8 @@ def wrapper(x):
     tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper),
                            check_dtype=check_dtype,
                            check_less_precise=check_less_precise)
-    if name in ['sum', 'prod']:
+
+    if opname in ['sum', 'prod']:
         expected = main_frame.apply(skipna_wrapper, axis=1)
         tm.assert_series_equal(result1, expected, check_dtype=False,
                                check_less_precise=check_less_precise)
@@ -80,33 +78,37 @@ def wrapper(x):
 
     # bad axis
     tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2)
-    # make sure works on mixed-type frame
-    getattr(float_string_frame, name)(axis=0)
-    getattr(float_string_frame, name)(axis=1)
-
-    if has_numeric_only:
-        getattr(float_string_frame, name)(axis=0, numeric_only=True)
-        getattr(float_string_frame, name)(axis=1, numeric_only=True)
-        getattr(float_frame, name)(axis=0, numeric_only=False)
-        getattr(float_frame, name)(axis=1, numeric_only=False)
 
     # all NA case
     if has_skipna:
-        all_na = float_frame * np.NaN
-        r0 = getattr(all_na, name)(axis=0)
-        r1 = getattr(all_na, name)(axis=1)
-        if name in ['sum', 'prod']:
-            unit = int(name == 'prod')
+        all_na = main_frame * np.NaN
+        r0 = getattr(all_na, opname)(axis=0)
+        r1 = getattr(all_na, opname)(axis=1)
+        if opname in ['sum', 'prod']:
+            unit = 1 if opname == 'prod' else 0  # result for empty sum/prod
             expected = pd.Series(unit, index=r0.index, dtype=r0.dtype)
             tm.assert_series_equal(r0, expected)
             expected = pd.Series(unit, index=r1.index, dtype=r1.dtype)
             tm.assert_series_equal(r1, expected)
 
 
-def _check_bool_op(name, alternative, frame, float_string_frame,
-                   has_skipna=True, has_bool_only=False):
+def assert_stat_op_api(opname, float_frame, float_string_frame,
+                       has_numeric_only=False):
+
+    # make sure works on mixed-type frame
+    getattr(float_string_frame, opname)(axis=0)
+    getattr(float_string_frame, opname)(axis=1)
+
+    if has_numeric_only:
+        getattr(float_string_frame, opname)(axis=0, numeric_only=True)
+        getattr(float_string_frame, opname)(axis=1, numeric_only=True)
+        getattr(float_frame, opname)(axis=0, numeric_only=False)
+        getattr(float_frame, opname)(axis=1, numeric_only=False)
+
 
-    f = getattr(frame, name)
+def assert_bool_op_calc(opname, alternative, main_frame, has_skipna=True):
+
+    f = getattr(main_frame, opname)
 
     if has_skipna:
         def skipna_wrapper(x):
@@ -118,27 +120,44 @@ def wrapper(x):
 
         result0 = f(axis=0, skipna=False)
         result1 = f(axis=1, skipna=False)
-        tm.assert_series_equal(result0, frame.apply(wrapper))
-        tm.assert_series_equal(result1, frame.apply(wrapper, axis=1),
+
+        tm.assert_series_equal(result0, main_frame.apply(wrapper))
+        tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1),
                                check_dtype=False)  # HACK: win32
     else:
         skipna_wrapper = alternative
         wrapper = alternative
 
     result0 = f(axis=0)
     result1 = f(axis=1)
-    tm.assert_series_equal(result0, frame.apply(skipna_wrapper))
-    tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1),
+
+    tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper))
+    tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1),
                            check_dtype=False)
 
     # bad axis
-    pytest.raises(ValueError, f, axis=2)
+    tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2)
 
-    # make sure works on mixed-type frame
+    # all NA case
+    if has_skipna:
+        all_na = main_frame * np.NaN
+        r0 = getattr(all_na, opname)(axis=0)
+        r1 = getattr(all_na, opname)(axis=1)
+        if opname == 'any':
+            assert not r0.any()
+            assert not r1.any()
+        else:
+            assert r0.all()
+            assert r1.all()
+
+
+def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame,
+                       has_bool_only=False):
+    # make sure op works on mixed-type frame
     mixed = float_string_frame
-    mixed['_bool_'] = np.random.randn(len(mixed)) > 0
-    getattr(mixed, name)(axis=0)
-    getattr(mixed, name)(axis=1)
+    mixed['_bool_'] = np.random.randn(len(mixed)) > 0.5
+    getattr(mixed, opname)(axis=0)
+    getattr(mixed, opname)(axis=1)
 
     class NonzeroFail(object):
 
@@ -148,22 +167,10 @@ def __nonzero__(self):
     mixed['_nonzero_fail_'] = NonzeroFail()
 
     if has_bool_only:
-        getattr(mixed, name)(axis=0, bool_only=True)
-        getattr(mixed, name)(axis=1, bool_only=True)
-        getattr(frame, name)(axis=0, bool_only=False)
-        getattr(frame, name)(axis=1, bool_only=False)
-
-    # all NA case
-    if has_skipna:
-        all_na = frame * np.NaN
-        r0 = getattr(all_na, name)(axis=0)
-        r1 = getattr(all_na, name)(axis=1)
-        if name == 'any':
-            assert not r0.any()
-            assert not r1.any()
-        else:
-            assert r0.all()
-            assert r1.all()
+        getattr(mixed, opname)(axis=0, bool_only=True)
+        getattr(mixed, opname)(axis=1, bool_only=True)
+        getattr(bool_frame_with_na, opname)(axis=0, bool_only=False)
+        getattr(bool_frame_with_na, opname)(axis=1, bool_only=False)
 
 
 class TestDataFrameAnalytics():
@@ -596,10 +603,10 @@ def test_reduce_mixed_frame(self):
 
     def test_count(self, float_frame_with_na, float_frame, float_string_frame):
         f = lambda s: notna(s).sum()
-        _check_stat_op('count', f, float_frame_with_na, float_frame,
-                       float_string_frame, has_skipna=False,
-                       has_numeric_only=True, check_dtype=False,
-                       check_dates=True)
+        assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False,
+                            check_dtype=False, check_dates=True)
+        assert_stat_op_api('count', float_frame, float_string_frame,
+                           has_numeric_only=True)
 
         # corner case
         frame = DataFrame()
@@ -628,9 +635,10 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame):
     def test_nunique(self, float_frame_with_na, float_frame,
                      float_string_frame):
         f = lambda s: len(algorithms.unique1d(s.dropna()))
-        _check_stat_op('nunique', f, float_frame_with_na,
-                       float_frame, float_string_frame, has_skipna=False,
-                       check_dtype=False, check_dates=True)
+        assert_stat_op_calc('nunique', f, float_frame_with_na,
+                            has_skipna=False, check_dtype=False,
+                            check_dates=True)
+        assert_stat_op_api('nunique', float_frame, float_string_frame)
 
         df = DataFrame({'A': [1, 1, 1],
                         'B': [1, 2, 3],
@@ -644,15 +652,13 @@ def test_nunique(self, float_frame_with_na, float_frame,
 
     def test_sum(self, float_frame_with_na, mixed_float_frame,
                  float_frame, float_string_frame):
-        _check_stat_op('sum', np.sum, float_frame_with_na, float_frame,
-                       float_string_frame, has_numeric_only=True,
-                       skipna_alternative=np.nansum)
-
+        assert_stat_op_api('sum', float_frame, float_string_frame,
+                           has_numeric_only=True)
+        assert_stat_op_calc('sum', np.sum, float_frame_with_na,
+                            skipna_alternative=np.nansum)
         # mixed types (with upcasting happening)
-        _check_stat_op('sum', np.sum,
-                       mixed_float_frame.astype('float32'), float_frame,
-                       float_string_frame, has_numeric_only=True,
-                       check_dtype=False, check_less_precise=True)
+        assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'),
+                            check_dtype=False, check_less_precise=True)
 
     @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var',
                                         'std', 'skew', 'min', 'max'])
@@ -679,13 +685,14 @@ def test_stat_operators_attempt_obj_array(self, method):
                 tm.assert_series_equal(result, expected)
 
     def test_mean(self, float_frame_with_na, float_frame, float_string_frame):
-        _check_stat_op('mean', np.mean, float_frame_with_na,
-                       float_frame, float_string_frame, check_dates=True)
+        assert_stat_op_calc('mean', np.mean, float_frame_with_na,
+                            check_dates=True)
+        assert_stat_op_api('mean', float_frame, float_string_frame)
 
     def test_product(self, float_frame_with_na, float_frame,
                      float_string_frame):
-        _check_stat_op('product', np.prod, float_frame_with_na,
-                       float_frame, float_string_frame)
+        assert_stat_op_calc('product', np.prod, float_frame_with_na)
+        assert_stat_op_api('product', float_frame, float_string_frame)
 
     # TODO: Ensure warning isn't emitted in the first place
     @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning")
@@ -696,18 +703,18 @@ def wrapper(x):
                 return np.nan
             return np.median(x)
 
-        _check_stat_op('median', wrapper, float_frame_with_na,
-                       float_frame, float_string_frame, check_dates=True)
+        assert_stat_op_calc('median', wrapper, float_frame_with_na,
+                            check_dates=True)
+        assert_stat_op_api('median', float_frame, float_string_frame)
 
     def test_min(self, float_frame_with_na, int_frame,
                  float_frame, float_string_frame):
         with warnings.catch_warnings(record=True):
             warnings.simplefilter("ignore", RuntimeWarning)
-            _check_stat_op('min', np.min, float_frame_with_na,
-                           float_frame, float_string_frame,
-                           check_dates=True)
-        _check_stat_op('min', np.min, int_frame, float_frame,
-                       float_string_frame)
+            assert_stat_op_calc('min', np.min, float_frame_with_na,
+                                check_dates=True)
+        assert_stat_op_calc('min', np.min, int_frame)
+        assert_stat_op_api('min', float_frame, float_string_frame)
 
     def test_cummin(self, datetime_frame):
         datetime_frame.loc[5:10, 0] = nan
@@ -759,26 +766,25 @@ def test_max(self, float_frame_with_na, int_frame,
                  float_frame, float_string_frame):
         with warnings.catch_warnings(record=True):
             warnings.simplefilter("ignore", RuntimeWarning)
-            _check_stat_op('max', np.max, float_frame_with_na,
-                           float_frame, float_string_frame,
-                           check_dates=True)
-        _check_stat_op('max', np.max, int_frame, float_frame,
-                       float_string_frame)
+            assert_stat_op_calc('max', np.max, float_frame_with_na,
+                                check_dates=True)
+        assert_stat_op_calc('max', np.max, int_frame)
+        assert_stat_op_api('max', float_frame, float_string_frame)
 
     def test_mad(self, float_frame_with_na, float_frame, float_string_frame):
         f = lambda x: np.abs(x - x.mean()).mean()
-        _check_stat_op('mad', f, float_frame_with_na, float_frame,
-                       float_string_frame)
+        assert_stat_op_calc('mad', f, float_frame_with_na)
+        assert_stat_op_api('mad', float_frame, float_string_frame)
 
     def test_var_std(self, float_frame_with_na, datetime_frame, float_frame,
                      float_string_frame):
         alt = lambda x: np.var(x, ddof=1)
-        _check_stat_op('var', alt, float_frame_with_na, float_frame,
-                       float_string_frame)
+        assert_stat_op_calc('var', alt, float_frame_with_na)
+        assert_stat_op_api('var', float_frame, float_string_frame)
 
         alt = lambda x: np.std(x, ddof=1)
-        _check_stat_op('std', alt, float_frame_with_na, float_frame,
-                       float_string_frame)
+        assert_stat_op_calc('std', alt, float_frame_with_na)
+        assert_stat_op_api('std', float_frame, float_string_frame)
 
         result = datetime_frame.std(ddof=4)
         expected = datetime_frame.apply(lambda x: x.std(ddof=4))
@@ -892,8 +898,8 @@ def test_cumprod(self, datetime_frame):
     def test_sem(self, float_frame_with_na, datetime_frame,
                  float_frame, float_string_frame):
         alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x))
-        _check_stat_op('sem', alt, float_frame_with_na,
-                       float_frame, float_string_frame)
+        assert_stat_op_calc('sem', alt, float_frame_with_na)
+        assert_stat_op_api('sem', float_frame, float_string_frame)
 
         result = datetime_frame.sem(ddof=4)
         expected = datetime_frame.apply(
@@ -917,8 +923,8 @@ def alt(x):
                 return np.nan
             return skew(x, bias=False)
 
-        _check_stat_op('skew', alt, float_frame_with_na,
-                       float_frame, float_string_frame)
+        assert_stat_op_calc('skew', alt, float_frame_with_na)
+        assert_stat_op_api('skew', float_frame, float_string_frame)
 
     @td.skip_if_no_scipy
     def test_kurt(self, float_frame_with_na, float_frame, float_string_frame):
@@ -929,8 +935,8 @@ def alt(x):
                 return np.nan
             return kurtosis(x, bias=False)
 
-        _check_stat_op('kurt', alt, float_frame_with_na,
-                       float_frame, float_string_frame)
+        assert_stat_op_calc('kurt', alt, float_frame_with_na)
+        assert_stat_op_api('kurt', float_frame, float_string_frame)
 
         index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                            labels=[[0, 0, 0, 0, 0, 0],
@@ -1205,9 +1211,9 @@ def wrapper(x):
                 return np.nan
             return np.median(x)
 
-        _check_stat_op('median', wrapper, int_frame, float_frame,
-                       float_string_frame, check_dtype=False,
-                       check_dates=True)
+        assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False,
+                            check_dates=True)
+        assert_stat_op_api('median', float_frame, float_string_frame)
 
     # Miscellanea
 
@@ -1262,13 +1268,12 @@ def test_idxmax(self, float_frame, int_frame):
     # ----------------------------------------------------------------------
     # Logical reductions
 
-    def test_any_all(self, bool_frame_with_na, float_string_frame):
-        _check_bool_op('any', np.any, bool_frame_with_na,
-                       float_string_frame, has_skipna=True,
-                       has_bool_only=True)
-        _check_bool_op('all', np.all, bool_frame_with_na,
-                       float_string_frame, has_skipna=True,
-                       has_bool_only=True)
+    @pytest.mark.parametrize('opname', ['any', 'all'])
+    def test_any_all(self, opname, bool_frame_with_na, float_string_frame):
+        assert_bool_op_calc(opname, getattr(np, opname), bool_frame_with_na,
+                            has_skipna=True)
+        assert_bool_op_api(opname, bool_frame_with_na, float_string_frame,
+                           has_bool_only=True)
 
     def test_any_all_extra(self):
         df = DataFrame({