From aa1b4575778487670c188919f4e627d0db04ec80 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 18 May 2018 19:41:38 +0100 Subject: [PATCH 1/6] add np.nan funcs to _cython_table --- doc/source/whatsnew/v0.23.1.txt | 5 ++++ pandas/core/base.py | 23 +++++++++++++++--- pandas/tests/test_nanops.py | 43 +++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 5a553264e828b..71db5c4812eef 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -94,3 +94,8 @@ Categorical ^^^^^^^^^^^ - + +Numeric +^^^^^^^ + +- :meth:`~DataFrame.agg` now correctly handles numpy NaN-aware methods like :meth:`numpy.nansum` (:issue:`19629`) diff --git a/pandas/core/base.py b/pandas/core/base.py index c331ead8d2fef..d790f3e834c18 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -22,7 +22,8 @@ from pandas.core import common as com, algorithms import pandas.core.nanops as nanops import pandas._libs.lib as lib -from pandas.compat.numpy import function as nv +from pandas.compat.numpy import (function as nv, _np_version_under1p10, + _np_version_under1p12) from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) @@ -191,17 +192,31 @@ class SelectionMixin(object): np.all: 'all', np.any: 'any', np.sum: 'sum', + np.nansum: 'sum', np.mean: 'mean', + np.nanmean: 'mean', np.prod: 'prod', np.std: 'std', + np.nanstd: 'std', np.var: 'var', + np.nanvar: 'var', np.median: 'median', + np.nanmedian: 'median', np.max: 'max', + np.nanmax: 'max', np.min: 'min', + np.nanmin: 'min', np.cumprod: 'cumprod', - np.cumsum: 'cumsum' + np.cumsum: 'cumsum', } + if not _np_version_under1p10: + _cython_table[np.nanprod] = 'prod' + + if not _np_version_under1p12: + _cython_table[np.nancumprod] = 'cumprod' + _cython_table[np.nancumsum] = 'cumsum' + @property def _selection_name(self): """ @@ -553,8 +568,8 @@ def is_any_frame(): result = None f = self._is_cython_func(arg) - if f and not args and not kwargs: - return getattr(self, f)(), None + if f: + return getattr(self, f)(*args, **kwargs), None # caller can react return result, True diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index a70ee80aee180..5487ecdb2083f 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -994,6 +994,49 @@ def prng(self): return np.random.RandomState(1234) +@pytest.fixture(params=[ + pd.Series([1, 2, 3, 4, 5, 6]), + pd.DataFrame([[1, 2, 3], [4, 5, 6]]) +]) +def nan_test_object(request): + return request.param + + +@pytest.mark.parametrize("standard, nan_method", [ + (np.sum, np.nansum), + (np.mean, np.nanmean), + (np.std, np.nanstd), + (np.var, np.nanvar), + (np.median, np.nanmedian), + (np.max, np.nanmax), + (np.min, np.nanmin), +]) +def test_np_nan_functions(standard, nan_method, nan_test_object): + tm.assert_almost_equal(nan_test_object.agg(standard), + nan_test_object.agg(nan_method), + check_exact=True) + + +@td.skip_if_no("numpy", min_version="1.10.0") +def test_np_nanprod(nan_test_object): + tm.assert_almost_equal(nan_test_object.agg(np.prod), + nan_test_object.agg(np.nanprod), + check_exact=True) + + +@td.skip_if_no("numpy", min_version="1.12.0") +def test_np_nancumprod(nan_test_object): + # Not using pytest params for methods as will fail at build time + methods = [ + (np.cumprod, np.nancumprod), + (np.cumsum, np.nancumsum) + ] + for standard, nan_method in methods: + tm.assert_almost_equal(nan_test_object.agg(standard), + nan_test_object.agg(nan_method), + check_exact=True) + + def test_use_bottleneck(): if nanops._BOTTLENECK_INSTALLED: From 39e2e59656b3850d1e1b642084aa7448b3b455cb Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 19 May 2018 09:49:58 +0100 Subject: [PATCH 2/6] test changes --- pandas/core/base.py | 2 +- pandas/tests/test_nanops.py | 48 ++++++++++++++++++++----------------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index d790f3e834c18..26ff512538be2 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -568,7 +568,7 @@ def is_any_frame(): result = None f = self._is_cython_func(arg) - if f: + if f is not None: return getattr(self, f)(*args, **kwargs), None # caller can react diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 5487ecdb2083f..b59baf96f671f 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -13,7 +13,8 @@ import pandas.core.nanops as nanops import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.compat.numpy import _np_version_under1p13 +from pandas.compat.numpy import (_np_version_under1p13, _np_version_under1p10, + _np_version_under1p12) use_bn = nanops._USE_BOTTLENECK @@ -995,10 +996,16 @@ def prng(self): @pytest.fixture(params=[ - pd.Series([1, 2, 3, 4, 5, 6]), - pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + pd.Series([1, 2, 3, 4]), + pd.DataFrame([[1, 2], [3, 4]]), + pd.Series([np.nan, 2, 3, 4]), + pd.DataFrame([[np.nan, 2], [3, 4]]), + pd.Series(), + pd.DataFrame(), + pd.Series([np.nan]), + pd.DataFrame([[np.nan]]), ]) -def nan_test_object(request): +def series_or_frame(request): return request.param @@ -1010,30 +1017,27 @@ def nan_test_object(request): (np.median, np.nanmedian), (np.max, np.nanmax), (np.min, np.nanmin), -]) -def test_np_nan_functions(standard, nan_method, nan_test_object): - tm.assert_almost_equal(nan_test_object.agg(standard), - nan_test_object.agg(nan_method), +], ids=lambda x: x.__name__) +def test_np_nan_functions(standard, nan_method, series_or_frame): + tm.assert_almost_equal(series_or_frame.agg(standard), + series_or_frame.agg(nan_method), check_exact=True) -@td.skip_if_no("numpy", min_version="1.10.0") -def test_np_nanprod(nan_test_object): - tm.assert_almost_equal(nan_test_object.agg(np.prod), - nan_test_object.agg(np.nanprod), +@pytest.mark.skipif(_np_version_under1p10, reason="requires numpy>=1.10") +def test_np_nanprod(series_or_frame): + tm.assert_almost_equal(series_or_frame.agg(np.prod), + series_or_frame.agg(np.nanprod), check_exact=True) -@td.skip_if_no("numpy", min_version="1.12.0") -def test_np_nancumprod(nan_test_object): - # Not using pytest params for methods as will fail at build time - methods = [ - (np.cumprod, np.nancumprod), - (np.cumsum, np.nancumsum) - ] - for standard, nan_method in methods: - tm.assert_almost_equal(nan_test_object.agg(standard), - nan_test_object.agg(nan_method), +@pytest.mark.skipif(_np_version_under1p12, reason="requires numpy>=1.12") +def test_np_nancumprod(series_or_frame): + funcs = [(np.cumprod, np.nancumprod), + (np.cumsum, np.nancumsum)] + for standard, nan_method in funcs: + tm.assert_almost_equal(series_or_frame.agg(standard), + series_or_frame.agg(nan_method), check_exact=True) From 5ec7e18927bd4fe615861cfd72a0cd5276a91e45 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 19 May 2018 15:11:28 +0100 Subject: [PATCH 3/6] add tests for func input to .agg to TestDataFrameAggregate and TestSeriesAggregate --- doc/source/whatsnew/v0.23.1.txt | 1 + pandas/conftest.py | 17 ++++++++++ pandas/core/base.py | 56 +++++++++++++++++-------------- pandas/core/frame.py | 10 +++--- pandas/core/groupby/groupby.py | 5 ++- pandas/tests/frame/test_apply.py | 23 +++++++++++++ pandas/tests/series/test_apply.py | 20 +++++++++++ pandas/tests/test_nanops.py | 49 +-------------------------- 8 files changed, 100 insertions(+), 81 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 71db5c4812eef..e08f9809c8f92 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -99,3 +99,4 @@ Numeric ^^^^^^^ - :meth:`~DataFrame.agg` now correctly handles numpy NaN-aware methods like :meth:`numpy.nansum` (:issue:`19629`) +- :meth:`~DataFrame.agg` now correctly handles built-in methods like ``sum`` when axis=1 (:issue:`21134`) diff --git a/pandas/conftest.py b/pandas/conftest.py index b09cb872a12fb..0069d4278ba1e 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -149,3 +149,20 @@ def tz_aware_fixture(request): Fixture for trying explicit timezones: {0} """ return request.param + + +@pytest.fixture( + # params: Python 3.5 randomizes dict access and xdist doesn't like that + # in fixtures. In order to get predetermined values we need to sort + # the list deterministically + # GH 21123 + params=list(sorted(pd.core.base.SelectionMixin._cython_table.items(), + key=lambda x: x[0].__name__)), + ids=lambda x: "({}-{!r})".format(x[0].__name__, x[1]), +) +def cython_table_items(request): + """ + Fixture for returning the items in + pandas.core.base.SelectionMixin._cython_table + """ + return request.param diff --git a/pandas/core/base.py b/pandas/core/base.py index 26ff512538be2..874168f5a49c7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -331,13 +331,14 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs): raise ValueError("{arg} is an unknown string function".format(arg=arg)) - def _aggregate(self, arg, *args, **kwargs): + def _aggregate(self, arg, axis=0, *args, **kwargs): """ provide an implementation for the aggregators Parameters ---------- arg : string, dict, function + axis : int *args : args to pass on to the function **kwargs : kwargs to pass on to the function @@ -350,17 +351,18 @@ def _aggregate(self, arg, *args, **kwargs): how can be a string describe the required post-processing, or None if not required """ + obj = self if axis == 0 else self.T is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) is_nested_renamer = False _axis = kwargs.pop('_axis', None) if _axis is None: - _axis = getattr(self, 'axis', 0) + _axis = getattr(obj, 'axis', 0) _level = kwargs.pop('_level', None) if isinstance(arg, compat.string_types): - return self._try_aggregate_string_function(arg, *args, - **kwargs), None + return obj._try_aggregate_string_function(arg, *args, + **kwargs), None if isinstance(arg, dict): @@ -368,7 +370,7 @@ def _aggregate(self, arg, *args, **kwargs): if _axis != 0: # pragma: no cover raise ValueError('Can only pass dict with axis=0') - obj = self._selected_obj + selected_obj = obj._selected_obj def nested_renaming_depr(level=4): # deprecation of nested renaming @@ -403,16 +405,16 @@ def nested_renaming_depr(level=4): if isinstance(v, dict): is_nested_renamer = True - if k not in obj.columns: + if k not in selected_obj.columns: msg = ('cannot perform renaming for {key} with a ' 'nested dictionary').format(key=k) raise SpecificationError(msg) nested_renaming_depr(4 + (_level or 0)) - elif isinstance(obj, ABCSeries): + elif isinstance(selected_obj, ABCSeries): nested_renaming_depr() - elif isinstance(obj, ABCDataFrame) and \ - k not in obj.columns: + elif isinstance(selected_obj, ABCDataFrame) and \ + k not in selected_obj.columns: raise KeyError( "Column '{col}' does not exist!".format(col=k)) @@ -422,8 +424,8 @@ def nested_renaming_depr(level=4): # deprecation of renaming keys # GH 15931 keys = list(compat.iterkeys(arg)) - if (isinstance(obj, ABCDataFrame) and - len(obj.columns.intersection(keys)) != len(keys)): + if (isinstance(selected_obj, ABCDataFrame) and len( + selected_obj.columns.intersection(keys)) != len(keys)): nested_renaming_depr() from pandas.core.reshape.concat import concat @@ -432,7 +434,7 @@ def _agg_1dim(name, how, subset=None): """ aggregate a 1-dim with how """ - colg = self._gotitem(name, ndim=1, subset=subset) + colg = obj._gotitem(name, ndim=1, subset=subset) if colg.ndim != 1: raise SpecificationError("nested dictionary is ambiguous " "in aggregation") @@ -442,8 +444,8 @@ def _agg_2dim(name, how): """ aggregate a 2-dim with how """ - colg = self._gotitem(self._selection, ndim=2, - subset=obj) + colg = obj._gotitem(obj._selection, ndim=2, + subset=selected_obj) return colg.aggregate(how, _level=None) def _agg(arg, func): @@ -473,20 +475,22 @@ def _agg(arg, func): else: - if self._selection is not None: + if obj._selection is not None: keys = None # some selection on the object - elif self._selection is not None: + elif obj._selection is not None: - sl = set(self._selection_list) + sl = set(obj._selection_list) # we are a Series like object, # but may have multiple aggregations if len(sl) == 1: - result = _agg(arg, lambda fname, - agg_how: _agg_1dim(self._selection, agg_how)) + result = _agg( + arg, + lambda fname, agg_how: _agg_1dim( + obj._selection, agg_how)) # we are selecting the same set as we are aggregating elif not len(sl - set(keys)): @@ -531,7 +535,7 @@ def is_any_frame(): return concat([result[k] for k in keys], keys=keys, axis=1), True - elif isinstance(self, ABCSeries) and is_any_series(): + elif isinstance(obj, ABCSeries) and is_any_series(): # we have a dict of Series # return a MI Series @@ -556,20 +560,20 @@ def is_any_frame(): # we have a dict of scalars result = Series(result, - name=getattr(self, 'name', None)) + name=getattr(obj, 'name', None)) return result, True elif is_list_like(arg) and arg not in compat.string_types: # we require a list, but not an 'str' - return self._aggregate_multiple_funcs(arg, - _level=_level, - _axis=_axis), None + return obj._aggregate_multiple_funcs(arg, + _level=_level, + _axis=_axis), None else: result = None - f = self._is_cython_func(arg) + f = obj._is_cython_func(arg) if f is not None: - return getattr(self, f)(*args, **kwargs), None + return getattr(obj, f)(*args, **kwargs), None # caller can react return result, True diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b6c33b4f79478..c515b13aaac82 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5818,13 +5818,11 @@ def _gotitem(self, def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) - # TODO: flipped axis result = None - if axis == 0: - try: - result, how = self._aggregate(func, axis=0, *args, **kwargs) - except TypeError: - pass + try: + result, how = self._aggregate(func, axis=axis, *args, **kwargs) + except TypeError: + pass if result is None: return self.apply(func, axis=axis, args=args, **kwargs) return result diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index df7a5dc9dc173..616345dde2d2f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4086,7 +4086,10 @@ def _post_process_cython_aggregate(self, obj): def aggregate(self, arg, *args, **kwargs): _level = kwargs.pop('_level', None) - result, how = self._aggregate(arg, _level=_level, *args, **kwargs) + _agg_kwargs = kwargs.copy() + axis = _agg_kwargs.pop('axis', 0) + result, how = self._aggregate(arg, axis, _level=_level, + *args, **_agg_kwargs) if how is None: return result diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index dfb2961befe35..a67ed9f57c177 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1056,3 +1056,26 @@ def test_non_callable_aggregates(self): expected = df.size assert result == expected + + @pytest.mark.parametrize("df", [ + pd.DataFrame([[1, 2], [3, 4]]), + pd.DataFrame([[np.nan, 2], [3, 4]]), + pd.DataFrame(), + ]) + def test_agg_function_input(self, df, cython_table_items): + # test whether the functions (keys) in + # pd.core.base.SelectionMixin._cython_table give the same result + # as the related strings (values) when used in df.agg. Examples: + # - ``df.agg(np.nansum)`` should give the same result as + # ``df.agg('sum')`` + # - ``df.agg(sum)`` should give the same result as ``df.agg('sum')`` + # etc. + # GH21123 + np_func, str_func = cython_table_items + + tm.assert_almost_equal(df.agg(np_func), + df.agg(str_func), + ) + tm.assert_almost_equal(df.agg(np_func, axis=1), + df.agg(str_func, axis=1), + ) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index b28b9f342695f..e375e62b7f797 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -587,3 +587,23 @@ def test_map_missing_mixed(self, vals, mapping, exp): result = s.map(mapping) tm.assert_series_equal(result, pd.Series(exp)) + + @pytest.mark.parametrize("series", [ + pd.Series([1, 2, 3, 4]), + pd.Series([np.nan, 2, 3, 4]), + pd.Series(), + ]) + def test_agg_function_input(self, series, cython_table_items): + # test whether the functions (keys) in + # pd.core.base.SelectionMixin._cython_table give the same result + # as the related strings (values), when used in ser.agg. Examples: + # - ``ser.agg(np.nansum)`` should give the same result as + # ``ser.agg('sum')`` + # - ``ser.agg(sum)`` should give the same result as ``ser.agg('sum')`` + # etc. + # GH21123 + np_func, str_func = cython_table_items + + tm.assert_almost_equal(series.agg(np_func), + series.agg(str_func), + ) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index b59baf96f671f..a70ee80aee180 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -13,8 +13,7 @@ import pandas.core.nanops as nanops import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.compat.numpy import (_np_version_under1p13, _np_version_under1p10, - _np_version_under1p12) +from pandas.compat.numpy import _np_version_under1p13 use_bn = nanops._USE_BOTTLENECK @@ -995,52 +994,6 @@ def prng(self): return np.random.RandomState(1234) -@pytest.fixture(params=[ - pd.Series([1, 2, 3, 4]), - pd.DataFrame([[1, 2], [3, 4]]), - pd.Series([np.nan, 2, 3, 4]), - pd.DataFrame([[np.nan, 2], [3, 4]]), - pd.Series(), - pd.DataFrame(), - pd.Series([np.nan]), - pd.DataFrame([[np.nan]]), -]) -def series_or_frame(request): - return request.param - - -@pytest.mark.parametrize("standard, nan_method", [ - (np.sum, np.nansum), - (np.mean, np.nanmean), - (np.std, np.nanstd), - (np.var, np.nanvar), - (np.median, np.nanmedian), - (np.max, np.nanmax), - (np.min, np.nanmin), -], ids=lambda x: x.__name__) -def test_np_nan_functions(standard, nan_method, series_or_frame): - tm.assert_almost_equal(series_or_frame.agg(standard), - series_or_frame.agg(nan_method), - check_exact=True) - - -@pytest.mark.skipif(_np_version_under1p10, reason="requires numpy>=1.10") -def test_np_nanprod(series_or_frame): - tm.assert_almost_equal(series_or_frame.agg(np.prod), - series_or_frame.agg(np.nanprod), - check_exact=True) - - -@pytest.mark.skipif(_np_version_under1p12, reason="requires numpy>=1.12") -def test_np_nancumprod(series_or_frame): - funcs = [(np.cumprod, np.nancumprod), - (np.cumsum, np.nancumsum)] - for standard, nan_method in funcs: - tm.assert_almost_equal(series_or_frame.agg(standard), - series_or_frame.agg(nan_method), - check_exact=True) - - def test_use_bottleneck(): if nanops._BOTTLENECK_INSTALLED: From f91b71635af6aedfbf337c4a30197680c9e13de6 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 21 May 2018 13:21:32 +0100 Subject: [PATCH 4/6] comments addressed for adding nan_funcs to _cython_table --- pandas/conftest.py | 2 +- pandas/tests/frame/test_apply.py | 15 ++++++++------- pandas/tests/series/test_apply.py | 7 ++++--- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 0069d4278ba1e..3eda078a802f4 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -158,7 +158,7 @@ def tz_aware_fixture(request): # GH 21123 params=list(sorted(pd.core.base.SelectionMixin._cython_table.items(), key=lambda x: x[0].__name__)), - ids=lambda x: "({}-{!r})".format(x[0].__name__, x[1]), + ids=lambda x: "({}-{!r})_fixture".format(x[0].__name__, x[1]), ) def cython_table_items(request): """ diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index a67ed9f57c177..14b1e7b3c243d 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1072,10 +1072,11 @@ def test_agg_function_input(self, df, cython_table_items): # etc. # GH21123 np_func, str_func = cython_table_items - - tm.assert_almost_equal(df.agg(np_func), - df.agg(str_func), - ) - tm.assert_almost_equal(df.agg(np_func, axis=1), - df.agg(str_func, axis=1), - ) + if str_func in ('cumprod', 'cumsum'): + tm.assert_frame_equal(df.agg(np_func), df.agg(str_func)) + tm.assert_frame_equal(df.agg(np_func, axis=1), + df.agg(str_func, axis=1)) + else: + tm.assert_series_equal(df.agg(np_func), df.agg(str_func)) + tm.assert_series_equal(df.agg(np_func, axis=1), + df.agg(str_func, axis=1)) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index e375e62b7f797..59e7851e4e92f 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -604,6 +604,7 @@ def test_agg_function_input(self, series, cython_table_items): # GH21123 np_func, str_func = cython_table_items - tm.assert_almost_equal(series.agg(np_func), - series.agg(str_func), - ) + if str_func in ('cumprod', 'cumsum'): + tm.assert_series_equal(series.agg(np_func), series.agg(str_func)) + else: + tm.assert_almost_equal(series.agg(np_func), series.agg(str_func)) From 396b32752299367df5ea59574c9c6297b93794cd Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 25 May 2018 20:50:18 +0100 Subject: [PATCH 5/6] new tests --- pandas/tests/frame/test_apply.py | 77 ++++++++++++++++++++++------- pandas/tests/series/test_apply.py | 82 +++++++++++++++++++++++-------- 2 files changed, 119 insertions(+), 40 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 14b1e7b3c243d..dd52efc973e8f 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1057,26 +1057,65 @@ def test_non_callable_aggregates(self): assert result == expected - @pytest.mark.parametrize("df", [ - pd.DataFrame([[1, 2], [3, 4]]), - pd.DataFrame([[np.nan, 2], [3, 4]]), - pd.DataFrame(), + @pytest.mark.parametrize("inputs", [ + [DataFrame(), { + 'sum': Series(), + 'max': Series(), + 'min': Series(), + 'all': Series(dtype=bool), + 'any': Series(dtype=bool), + 'mean': Series(), + 'prod': Series(), + 'std': Series(), + 'var': Series(), + 'median': Series(), + 'cumprod': DataFrame(), + 'cumsum': DataFrame(), + }], + [DataFrame([[np.nan, 1], [1, 2]]), { + 'sum': Series([1., 3]), + 'max': Series([1., 2]), + 'min': Series([1., 1]), + 'all': Series([True, True]), + 'any': Series([True, True]), + 'mean': Series([1, 1.5]), + 'prod': Series([1., 2]), + 'std': Series([np.nan, 0.707107]), + 'var': Series([np.nan, 0.5]), + 'median': Series([1, 1.5]), + 'cumprod': DataFrame([[np.nan, 1], [1., 2.]]), + 'cumsum': DataFrame([[np.nan, 1], [1., 3.]]), + }], + [DataFrame([['a', 'b'], ['b', 'a']]), { + 'sum': Series(['ab', 'ba']), + 'max': Series(['b', 'b']), + 'min': Series(['a', 'a']), + 'all': Series([True, True]), + 'any': Series([True, True]), + 'mean': Series([], index=pd.Index([], dtype='int64')), + 'prod': Series([], index=pd.Index([], dtype='int64')), + 'std': Series([], index=pd.Index([], dtype='int64')), + 'var': Series([], index=pd.Index([], dtype='int64')), + 'median': Series([], index=pd.Index([], dtype='int64')), + 'cumprod': TypeError, + 'cumsum': DataFrame([['a', 'b'], ['ab', 'ba']]), + }], ]) - def test_agg_function_input(self, df, cython_table_items): - # test whether the functions (keys) in - # pd.core.base.SelectionMixin._cython_table give the same result - # as the related strings (values) when used in df.agg. Examples: - # - ``df.agg(np.nansum)`` should give the same result as - # ``df.agg('sum')`` - # - ``df.agg(sum)`` should give the same result as ``df.agg('sum')`` - # etc. + @pytest.mark.parametrize("axis", [0, 1], ids=lambda x: "axis {}".format(x)) + def test_agg_function_input(self, cython_table_items, inputs, axis): # GH21123 np_func, str_func = cython_table_items - if str_func in ('cumprod', 'cumsum'): - tm.assert_frame_equal(df.agg(np_func), df.agg(str_func)) - tm.assert_frame_equal(df.agg(np_func, axis=1), - df.agg(str_func, axis=1)) + df = inputs[0] + expected = inputs[1][str_func] + + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + # e.g. DataFrame(['a b'.split()]).cumprod() will raise + df.agg(np_func, axis=axis) + df.agg(str_func, axis=axis) + elif str_func in ('cumprod', 'cumsum'): + tm.assert_frame_equal(df.agg(np_func, axis=axis), expected) + tm.assert_frame_equal(df.agg(str_func, axis=axis), expected) else: - tm.assert_series_equal(df.agg(np_func), df.agg(str_func)) - tm.assert_series_equal(df.agg(np_func, axis=1), - df.agg(str_func, axis=1)) + tm.assert_series_equal(df.agg(np_func, axis=axis), expected) + tm.assert_series_equal(df.agg(str_func, axis=axis), expected) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 59e7851e4e92f..bb615c39003ef 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -331,6 +331,67 @@ def test_non_callable_aggregates(self): ('mean', 1.5)])) assert_series_equal(result[expected.index], expected) + @pytest.mark.parametrize("inputs", [ + [Series(), { + 'sum': 0, + 'max': np.nan, + 'min': np.nan, + 'all': True, + 'any': False, + 'mean': np.nan, + 'prod': 1, + 'std': np.nan, + 'var': np.nan, + 'median': np.nan, + 'cumprod': Series([], Index([])), + 'cumsum': Series([], Index([])), + }], + [Series([np.nan, 1, 2, 3]), { + 'sum': 6, + 'max': 3, + 'min': 1, + 'all': True, + 'any': True, + 'mean': 2, + 'prod': 6, + 'std': 1, + 'var': 1, + 'median': 2, + 'cumprod': Series([np.nan, 1, 2, 6]), + 'cumsum': Series([np.nan, 1, 3, 6]), + }], + [Series('a b c'.split()), { + 'sum': 'abc', + 'max': 'c', + 'min': 'a', + 'all': 'c', # see GH12863 + 'any': 'a', + 'mean': TypeError, # mean raises TypeError + 'prod': TypeError, + 'std': TypeError, + 'var': TypeError, + 'median': TypeError, + 'cumprod': TypeError, + 'cumsum': Series(['a', 'ab', 'abc']), + }], + ]) + def test_agg_function_input(self, inputs, cython_table_items): + # GH21123 + np_func, str_func = cython_table_items + series = inputs[0] + expected = inputs[1][str_func] + + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + series.agg(np_func) + series.agg(str_func) + elif str_func in ('cumprod', 'cumsum'): + tm.assert_series_equal(series.agg(np_func), expected) + tm.assert_series_equal(series.agg(str_func), expected) + else: + tm.assert_almost_equal(series.agg(np_func), expected) + tm.assert_almost_equal(series.agg(str_func), expected) + class TestSeriesMap(TestData): @@ -587,24 +648,3 @@ def test_map_missing_mixed(self, vals, mapping, exp): result = s.map(mapping) tm.assert_series_equal(result, pd.Series(exp)) - - @pytest.mark.parametrize("series", [ - pd.Series([1, 2, 3, 4]), - pd.Series([np.nan, 2, 3, 4]), - pd.Series(), - ]) - def test_agg_function_input(self, series, cython_table_items): - # test whether the functions (keys) in - # pd.core.base.SelectionMixin._cython_table give the same result - # as the related strings (values), when used in ser.agg. Examples: - # - ``ser.agg(np.nansum)`` should give the same result as - # ``ser.agg('sum')`` - # - ``ser.agg(sum)`` should give the same result as ``ser.agg('sum')`` - # etc. - # GH21123 - np_func, str_func = cython_table_items - - if str_func in ('cumprod', 'cumsum'): - tm.assert_series_equal(series.agg(np_func), series.agg(str_func)) - else: - tm.assert_almost_equal(series.agg(np_func), series.agg(str_func)) From 580edcf3e19636bb72faa7d1d6857da6dc97b2a7 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 26 May 2018 03:00:58 +0100 Subject: [PATCH 6/6] changed tests --- pandas/tests/frame/test_apply.py | 15 ++++++++++----- pandas/tests/series/test_apply.py | 18 +++++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index dd52efc973e8f..e3c87917ec89f 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1112,10 +1112,15 @@ def test_agg_function_input(self, cython_table_items, inputs, axis): with pytest.raises(expected): # e.g. DataFrame(['a b'.split()]).cumprod() will raise df.agg(np_func, axis=axis) + with pytest.raises(expected): df.agg(str_func, axis=axis) - elif str_func in ('cumprod', 'cumsum'): - tm.assert_frame_equal(df.agg(np_func, axis=axis), expected) - tm.assert_frame_equal(df.agg(str_func, axis=axis), expected) + return + + result = df.agg(np_func, axis=axis) + result_str_func = df.agg(str_func, axis=axis) + if str_func in ('cumprod', 'cumsum'): + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result_str_func, expected) else: - tm.assert_series_equal(df.agg(np_func, axis=axis), expected) - tm.assert_series_equal(df.agg(str_func, axis=axis), expected) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result_str_func, expected) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index bb615c39003ef..662a411c6fbd3 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -384,13 +384,21 @@ def test_agg_function_input(self, inputs, cython_table_items): if isinstance(expected, type) and issubclass(expected, Exception): with pytest.raises(expected): series.agg(np_func) + with pytest.raises(expected): series.agg(str_func) - elif str_func in ('cumprod', 'cumsum'): - tm.assert_series_equal(series.agg(np_func), expected) - tm.assert_series_equal(series.agg(str_func), expected) + return + + result = series.agg(np_func) + result_str_func = series.agg(str_func) + if str_func in ('cumprod', 'cumsum'): + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result_str_func, expected) + elif tm.is_number(expected): + assert np.isclose(result, expected, equal_nan=True) + assert np.isclose(result_str_func, expected, equal_nan=True) else: - tm.assert_almost_equal(series.agg(np_func), expected) - tm.assert_almost_equal(series.agg(str_func), expected) + assert result == expected + assert result_str_func == expected class TestSeriesMap(TestData):