From 3d95bfc4e9372d5a5df8e51dbd51e4513cf2931c Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 18 May 2018 19:41:38 +0100 Subject: [PATCH 1/7] Fix bug where df.agg(..., axis=1) gives wrong result --- pandas/core/frame.py | 15 ++++--- pandas/tests/frame/test_apply.py | 72 ++++++++++++++++++++++++++++++- pandas/tests/series/test_apply.py | 72 +++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16332738ce610..724e661d9f319 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6070,17 +6070,20 @@ def _gotitem(self, def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) - # TODO: flipped axis result = None - if axis == 0: - try: - result, how = self._aggregate(func, axis=0, *args, **kwargs) - except TypeError: - pass + try: + result, how = self._aggregate(func, axis=axis, *args, **kwargs) + except TypeError: + pass if result is None: return self.apply(func, axis=axis, args=args, **kwargs) return result + @Appender(NDFrame._aggregate.__doc__, indents=2) + def _aggregate(self, arg, axis=0, *args, **kwargs): + obj = self.T if axis == 1 else self + return super(DataFrame, obj)._aggregate(arg, *args, **kwargs) + agg = aggregate def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index dfb2961befe35..0cca29a6437a2 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -946,7 +946,7 @@ def test_agg_dict_nested_renaming_depr(self): df = pd.DataFrame({'A': range(5), 'B': 5}) # nested renaming - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): df.agg({'A': {'foo': 'min'}, 'B': {'bar': 'max'}}) @@ -1056,3 +1056,73 @@ def test_non_callable_aggregates(self): expected = df.size assert result == expected + + @pytest.mark.parametrize("frame, expected_dict", [ + [DataFrame(), { + 'sum': Series(), + 'max': Series(), + 'min': Series(), + 'all': Series(dtype=bool), + 'any': Series(dtype=bool), + 'mean': Series(), + 'prod': Series(), + 'std': Series(), + 'var': Series(), + 'median': Series(), + 'cumprod': DataFrame(), + 'cumsum': DataFrame(), + }], + [DataFrame([[np.nan, 1], [1, 2]]), { + 'sum': Series([1., 3]), + 'max': Series([1., 2]), + 'min': Series([1., 1]), + 'all': Series([True, True]), + 'any': Series([True, True]), + 'mean': Series([1, 1.5]), + 'prod': Series([1., 2]), + 'std': Series([np.nan, 0.707107]), + 'var': Series([np.nan, 0.5]), + 'median': Series([1, 1.5]), + 'cumprod': DataFrame([[np.nan, 1], [1., 2.]]), + 'cumsum': DataFrame([[np.nan, 1], [1., 3.]]), + }], + [DataFrame([['a', 'b'], ['b', 'a']]), { + 'sum': Series(['ab', 'ba']), + 'max': Series(['b', 'b']), + 'min': Series(['a', 'a']), + 'all': Series([True, True]), + 'any': Series([True, True]), + 'mean': Series([], index=pd.Index([], dtype='int64')), + 'prod': Series([], index=pd.Index([], dtype='int64')), + 'std': Series([], index=pd.Index([], dtype='int64')), + 'var': Series([], index=pd.Index([], dtype='int64')), + 'median': Series([], index=pd.Index([], dtype='int64')), + 'cumprod': TypeError, + 'cumsum': DataFrame([['a', 'b'], ['ab', 'ba']]), + }], + ]) + @pytest.mark.parametrize("axis", [0, 1], ids=lambda x: "axis {}".format(x)) + def test_agg_cython_table(self, cython_table_items, + frame, expected_dict, axis): + # GH21224 + # test if using items in pandas.core.base.SelectionMixin._cython_table + # in agg gives correct results + np_func, str_func = cython_table_items + expected = expected_dict[str_func] + + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + # e.g. DataFrame(['a b'.split()]).cumprod() will raise + frame.agg(np_func, axis=axis) + with pytest.raises(expected): + frame.agg(str_func, axis=axis) + return + + result = frame.agg(np_func, axis=axis) + result_str_func = frame.agg(str_func, axis=axis) + if str_func in ('cumprod', 'cumsum'): + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result_str_func, expected) + else: + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result_str_func, expected) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index b28b9f342695f..24cce6e347121 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -331,6 +331,78 @@ def test_non_callable_aggregates(self): ('mean', 1.5)])) assert_series_equal(result[expected.index], expected) + @pytest.mark.parametrize("series, expected_dict", [ + [Series(), { + 'sum': 0, + 'max': np.nan, + 'min': np.nan, + 'all': True, + 'any': False, + 'mean': np.nan, + 'prod': 1, + 'std': np.nan, + 'var': np.nan, + 'median': np.nan, + 'cumprod': Series([], Index([])), + 'cumsum': Series([], Index([])), + }], + [Series([np.nan, 1, 2, 3]), { + 'sum': 6, + 'max': 3, + 'min': 1, + 'all': True, + 'any': True, + 'mean': 2, + 'prod': 6, + 'std': 1, + 'var': 1, + 'median': 2, + 'cumprod': Series([np.nan, 1, 2, 6]), + 'cumsum': Series([np.nan, 1, 3, 6]), + }], + [Series('a b c'.split()), { + 'sum': 'abc', + 'max': 'c', + 'min': 'a', + 'all': 'c', # see GH12863 + 'any': 'a', + 'mean': TypeError, # mean raises TypeError + 'prod': TypeError, + 'std': TypeError, + 'var': TypeError, + 'median': TypeError, + 'cumprod': TypeError, + 'cumsum': Series(['a', 'ab', 'abc']), + }], + ]) + def test_agg_cython_table(self, cython_table_items, + series, expected_dict): + # GH21224 + # test if using items in pandas.core.base.SelectionMixin._cython_table + # in agg gives correct results + np_func, str_func = cython_table_items + expected = expected_dict[str_func] + + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + # e.g. Series('a b'.split()).cumprod() will raise + series.agg(np_func) + with pytest.raises(expected): + series.agg(str_func) + return + + result = series.agg(np_func) + result_str_func = series.agg(str_func) + if str_func in ('cumprod', 'cumsum'): + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result_str_func, expected) + elif tm.is_number(expected): + assert np.isclose(result, expected, equal_nan=True) + assert np.isclose(result_str_func, expected, equal_nan=True) + else: + assert result == expected + assert result_str_func == expected + class TestSeriesMap(TestData): From 262bd3e58ab12c259f4311caa3292332204674de Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 30 May 2018 20:38:49 +0100 Subject: [PATCH 2/7] Fix tests for bug where df.agg(..., axis=1) gives wrong result --- pandas/core/frame.py | 1 - pandas/tests/frame/test_apply.py | 224 +++++++++++++++++------------- pandas/tests/series/test_apply.py | 174 ++++++++++++++--------- 3 files changed, 237 insertions(+), 162 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 724e661d9f319..93603debf649c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6079,7 +6079,6 @@ def aggregate(self, func, axis=0, *args, **kwargs): return self.apply(func, axis=axis, args=args, **kwargs) return result - @Appender(NDFrame._aggregate.__doc__, indents=2) def _aggregate(self, arg, axis=0, *args, **kwargs): obj = self.T if axis == 1 else self return super(DataFrame, obj)._aggregate(arg, *args, **kwargs) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 0cca29a6437a2..7a6c9fb93fff0 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -6,6 +6,7 @@ import operator from datetime import datetime +from itertools import chain import warnings import numpy as np @@ -21,6 +22,38 @@ from pandas.tests.frame.common import TestData +def _get_cython_table_params(frame, func_names_and_expected): + """combine frame, functions from SelectionMixin._cython_table + keys and expected result. + + Parameters + ---------- + frame : DataFrame + A symmetrical DataFrame + func_names_and_expected : Sequence of two items + The first item is a name of a NDFrame method ('sum', 'prod') etc. + The second item is the expected return value + + Returns + ------- + results : list + List of three items (DataFrame, function, expected result) + """ + table = pd.core.base.SelectionMixin._cython_table + if compat.PY36: + table = list(table.items()) + else: # dicts have random order in Python<3.6, which xdist doesn't like + table = sorted(((key, value) for key, value in table.items()), + key=lambda x: x[0].__class__.__name__) + results = [] + for func_name, expected in func_names_and_expected: + results.append((frame, func_name, expected)) + results += [ + (frame, func, expected) for func, name in table + if name == func_name] + return results + + class TestDataFrameApply(TestData): def test_apply(self): @@ -867,27 +900,27 @@ def test_agg_transform(self): result = self.frame.transform(['sqrt', np.abs]) assert_frame_equal(result, expected) - def test_transform_and_agg_err(self): + def test_transform_and_agg_err(self, axis): # cannot both transform and agg def f(): - self.frame.transform(['max', 'min']) + self.frame.transform(['max', 'min'], axis=axis) pytest.raises(ValueError, f) def f(): with np.errstate(all='ignore'): - self.frame.agg(['max', 'sqrt']) + self.frame.agg(['max', 'sqrt'], axis=axis) pytest.raises(ValueError, f) def f(): with np.errstate(all='ignore'): - self.frame.transform(['max', 'sqrt']) + self.frame.transform(['max', 'sqrt'], axis=axis) pytest.raises(ValueError, f) df = pd.DataFrame({'A': range(5), 'B': 5}) def f(): with np.errstate(all='ignore'): - df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']}) + df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']}, axis=axis) @pytest.mark.parametrize('method', [ 'abs', 'shift', 'pct_change', 'cumsum', 'rank', @@ -950,38 +983,47 @@ def test_agg_dict_nested_renaming_depr(self): df.agg({'A': {'foo': 'min'}, 'B': {'bar': 'max'}}) - def test_agg_reduce(self): + def test_agg_reduce(self, axis): + other_axis = abs(axis - 1) + name1, name2 = self.frame.axes[other_axis].unique()[:2] + # all reducers - expected = zip_frames(self.frame.mean().to_frame(), - self.frame.max().to_frame(), - self.frame.sum().to_frame()).T + expected = zip_frames(self.frame.mean(axis=axis).to_frame(), + self.frame.max(axis=axis).to_frame(), + self.frame.sum(axis=axis).to_frame()).T expected.index = ['mean', 'max', 'sum'] - result = self.frame.agg(['mean', 'max', 'sum']) + result = self.frame.agg(['mean', 'max', 'sum'], axis=axis) assert_frame_equal(result, expected) # dict input with scalars - result = self.frame.agg({'A': 'mean', 'B': 'sum'}) - expected = Series([self.frame.A.mean(), self.frame.B.sum()], - index=['A', 'B']) + func = {name1: 'mean', name2: 'sum'} + result = self.frame.agg(func, axis=axis) + expected = Series([self.frame.loc(other_axis)[name1].mean(), + self.frame.loc(other_axis)[name2].sum()], + index=[name1, name2]) assert_series_equal(result.reindex_like(expected), expected) # dict input with lists - result = self.frame.agg({'A': ['mean'], 'B': ['sum']}) - expected = DataFrame({'A': Series([self.frame.A.mean()], - index=['mean']), - 'B': Series([self.frame.B.sum()], - index=['sum'])}) + func = {name1: ['mean'], name2: ['sum']} + result = self.frame.agg(func, axis=axis) + expected = DataFrame({ + name1: Series([self.frame.loc(other_axis)[name1].mean()], + index=['mean']), + name2: Series([self.frame.loc(other_axis)[name2].sum()], + index=['sum'])}) assert_frame_equal(result.reindex_like(expected), expected) # dict input with lists with multiple - result = self.frame.agg({'A': ['mean', 'sum'], - 'B': ['sum', 'max']}) - expected = DataFrame({'A': Series([self.frame.A.mean(), - self.frame.A.sum()], - index=['mean', 'sum']), - 'B': Series([self.frame.B.sum(), - self.frame.B.max()], - index=['sum', 'max'])}) + func = {name1: ['mean', 'sum'], + name2: ['sum', 'max']} + result = self.frame.agg(func, axis=axis) + expected = DataFrame({ + name1: Series([self.frame.loc(other_axis)[name1].mean(), + self.frame.loc(other_axis)[name1].sum()], + index=['mean', 'sum']), + name2: Series([self.frame.loc(other_axis)[name2].sum(), + self.frame.loc(other_axis)[name2].max()], + index=['sum', 'max'])}) assert_frame_equal(result.reindex_like(expected), expected) def test_nuiscance_columns(self): @@ -1057,72 +1099,66 @@ def test_non_callable_aggregates(self): assert result == expected - @pytest.mark.parametrize("frame, expected_dict", [ - [DataFrame(), { - 'sum': Series(), - 'max': Series(), - 'min': Series(), - 'all': Series(dtype=bool), - 'any': Series(dtype=bool), - 'mean': Series(), - 'prod': Series(), - 'std': Series(), - 'var': Series(), - 'median': Series(), - 'cumprod': DataFrame(), - 'cumsum': DataFrame(), - }], - [DataFrame([[np.nan, 1], [1, 2]]), { - 'sum': Series([1., 3]), - 'max': Series([1., 2]), - 'min': Series([1., 1]), - 'all': Series([True, True]), - 'any': Series([True, True]), - 'mean': Series([1, 1.5]), - 'prod': Series([1., 2]), - 'std': Series([np.nan, 0.707107]), - 'var': Series([np.nan, 0.5]), - 'median': Series([1, 1.5]), - 'cumprod': DataFrame([[np.nan, 1], [1., 2.]]), - 'cumsum': DataFrame([[np.nan, 1], [1., 3.]]), - }], - [DataFrame([['a', 'b'], ['b', 'a']]), { - 'sum': Series(['ab', 'ba']), - 'max': Series(['b', 'b']), - 'min': Series(['a', 'a']), - 'all': Series([True, True]), - 'any': Series([True, True]), - 'mean': Series([], index=pd.Index([], dtype='int64')), - 'prod': Series([], index=pd.Index([], dtype='int64')), - 'std': Series([], index=pd.Index([], dtype='int64')), - 'var': Series([], index=pd.Index([], dtype='int64')), - 'median': Series([], index=pd.Index([], dtype='int64')), - 'cumprod': TypeError, - 'cumsum': DataFrame([['a', 'b'], ['ab', 'ba']]), - }], - ]) - @pytest.mark.parametrize("axis", [0, 1], ids=lambda x: "axis {}".format(x)) - def test_agg_cython_table(self, cython_table_items, - frame, expected_dict, axis): + @pytest.mark.parametrize("df, func, expected", chain( + _get_cython_table_params( + DataFrame(), [ + ('sum', Series()), + ('max', Series()), + ('min', Series()), + ('all', Series(dtype=bool)), + ('any', Series(dtype=bool)), + ('mean', Series()), + ('prod', Series()), + ('std', Series()), + ('var', Series()), + ('median', Series()), + ]), + _get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), [ + ('sum', Series([1., 3])), + ('max', Series([1., 2])), + ('min', Series([1., 1])), + ('all', Series([True, True])), + ('any', Series([True, True])), + ('mean', Series([1, 1.5])), + ('prod', Series([1., 2])), + ('std', Series([np.nan, 0.707107])), + ('var', Series([np.nan, 0.5])), + ('median', Series([1, 1.5])), + ]), + )) + def test_agg_cython_table(self, df, func, expected, axis): # GH21224 - # test if using items in pandas.core.base.SelectionMixin._cython_table - # in agg gives correct results - np_func, str_func = cython_table_items - expected = expected_dict[str_func] - - if isinstance(expected, type) and issubclass(expected, Exception): - with pytest.raises(expected): - # e.g. DataFrame(['a b'.split()]).cumprod() will raise - frame.agg(np_func, axis=axis) - with pytest.raises(expected): - frame.agg(str_func, axis=axis) - return - - result = frame.agg(np_func, axis=axis) - result_str_func = frame.agg(str_func, axis=axis) - if str_func in ('cumprod', 'cumsum'): - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result_str_func, expected) - else: - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result_str_func, expected) + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = df.agg(func, axis=axis) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("df, func, expected", chain( + _get_cython_table_params( + DataFrame(), [ + ('cumprod', DataFrame()), + ('cumsum', DataFrame()), + ]), + _get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), [ + ('cumprod', DataFrame([[np.nan, 1], [1., 2.]])), + ('cumsum', DataFrame([[np.nan, 1], [1., 3.]])), + ]), + )) + def test_agg_cython_table_transform(self, df, func, expected, axis): + # GH21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + result = df.agg(func, axis=axis) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("df, func, expected", _get_cython_table_params( + DataFrame([['a', 'b'], ['b', 'a']]), [ + ['cumprod', TypeError], + ]), + ) + def test_agg_cython_table_raises(self, df, func, expected, axis): + # GH21224 + with pytest.raises(expected): + df.agg(func, axis=axis) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 24cce6e347121..e85a8732092c9 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -4,6 +4,7 @@ import pytest from collections import Counter, defaultdict, OrderedDict +from itertools import chain import numpy as np import pandas as pd @@ -17,6 +18,38 @@ from .common import TestData +def _get_cython_table_params(series, func_names_and_expected): + """combine series, functions from SelectionMixin._cython_table + keys and expected result. + + Parameters + ---------- + series : Series + A Series + func_names_and_expected : Sequence of two items + The first item is a name of a NDFrame method ('sum', 'prod') etc. + The second item is the expected return value + + Returns + ------- + results : list + List of three items (Series, function, expected result) + """ + table = pd.core.base.SelectionMixin._cython_table + if compat.PY36: + table = list(table.items()) + else: # dicts have random order in Python<3.6, which xdist doesn't like + table = sorted(((key, value) for key, value in table.items()), + key=lambda x: x[0].__class__.__name__) + results = [] + for func_name, expected in func_names_and_expected: + results.append((series, func_name, expected)) + results += [ + (series, func, expected) for func, name in table + if name == func_name] + return results + + class TestSeriesApply(TestData): def test_apply(self): @@ -331,77 +364,84 @@ def test_non_callable_aggregates(self): ('mean', 1.5)])) assert_series_equal(result[expected.index], expected) - @pytest.mark.parametrize("series, expected_dict", [ - [Series(), { - 'sum': 0, - 'max': np.nan, - 'min': np.nan, - 'all': True, - 'any': False, - 'mean': np.nan, - 'prod': 1, - 'std': np.nan, - 'var': np.nan, - 'median': np.nan, - 'cumprod': Series([], Index([])), - 'cumsum': Series([], Index([])), - }], - [Series([np.nan, 1, 2, 3]), { - 'sum': 6, - 'max': 3, - 'min': 1, - 'all': True, - 'any': True, - 'mean': 2, - 'prod': 6, - 'std': 1, - 'var': 1, - 'median': 2, - 'cumprod': Series([np.nan, 1, 2, 6]), - 'cumsum': Series([np.nan, 1, 3, 6]), - }], - [Series('a b c'.split()), { - 'sum': 'abc', - 'max': 'c', - 'min': 'a', - 'all': 'c', # see GH12863 - 'any': 'a', - 'mean': TypeError, # mean raises TypeError - 'prod': TypeError, - 'std': TypeError, - 'var': TypeError, - 'median': TypeError, - 'cumprod': TypeError, - 'cumsum': Series(['a', 'ab', 'abc']), - }], - ]) - def test_agg_cython_table(self, cython_table_items, - series, expected_dict): + @pytest.mark.parametrize("series, func, expected", chain( + _get_cython_table_params(Series(), [ + ('sum', 0), + ('max', np.nan), + ('min', np.nan), + ('all', True), + ('any', False), + ('mean', np.nan), + ('prod', 1), + ('std', np.nan), + ('var', np.nan), + ('median', np.nan), + ]), + _get_cython_table_params(Series([np.nan, 1, 2, 3]), [ + ('sum', 6), + ('max', 3), + ('min', 1), + ('all', True), + ('any', True), + ('mean', 2), + ('prod', 6), + ('std', 1), + ('var', 1), + ('median', 2), + ]), + _get_cython_table_params(Series('a b c'.split()), [ + ('sum', 'abc'), + ('max', 'c'), + ('min', 'a'), + ('all', 'c'), # see GH12863 + ('any', 'a'), + ]), + )) + def test_agg_cython_table(self, series, func, expected): # GH21224 - # test if using items in pandas.core.base.SelectionMixin._cython_table - # in agg gives correct results - np_func, str_func = cython_table_items - expected = expected_dict[str_func] - - if isinstance(expected, type) and issubclass(expected, Exception): - with pytest.raises(expected): - # e.g. Series('a b'.split()).cumprod() will raise - series.agg(np_func) - with pytest.raises(expected): - series.agg(str_func) - return - - result = series.agg(np_func) - result_str_func = series.agg(str_func) - if str_func in ('cumprod', 'cumsum'): - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result_str_func, expected) - elif tm.is_number(expected): + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = series.agg(func) + if tm.is_number(expected): assert np.isclose(result, expected, equal_nan=True) - assert np.isclose(result_str_func, expected, equal_nan=True) else: assert result == expected - assert result_str_func == expected + + @pytest.mark.parametrize("series, func, expected", chain( + _get_cython_table_params(Series(), [ + ('cumprod', Series([], Index([]))), + ('cumsum', Series([], Index([]))), + ]), + _get_cython_table_params(Series([np.nan, 1, 2, 3]), [ + ('cumprod', Series([np.nan, 1, 2, 6])), + ('cumsum', Series([np.nan, 1, 3, 6])), + ]), + _get_cython_table_params(Series('a b c'.split()), [ + ('cumsum', Series(['a', 'ab', 'abc'])), + ]), + )) + def test_agg_cython_table_transform(self, series, func, expected): + # GH21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + result = series.agg(func) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("series, func, expected", chain( + _get_cython_table_params(Series('a b c'.split()), [ + ('mean', TypeError), # mean raises TypeError + ('prod', TypeError), + ('std', TypeError), + ('var', TypeError), + ('median', TypeError), + ('cumprod', TypeError), + ]) + )) + def test_agg_cython_table_raises(self, series, func, expected): + # GH21224 + with pytest.raises(expected): + # e.g. Series('a b'.split()).cumprod() will raise + series.agg(func) class TestSeriesMap(TestData): From ed43757fb23c1c6b1fbe2a5be5a72dd370405469 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 23 Jun 2018 08:32:45 +0100 Subject: [PATCH 3/7] changed according to comments --- pandas/conftest.py | 17 +++++++++++++++++ .../tests/generic/test_label_or_level_utils.py | 7 ------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index a979c3fc3bfac..98faeee6b09c3 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -60,6 +60,23 @@ def spmatrix(request): return getattr(sparse, request.param + '_matrix') +@pytest.fixture(params=[0, 1], + ids=lambda x: "axis {}".format(x)) +def axis(request): + """ + Fixture for returning the axis numbers of a dataframe. + """ + return request.param + + +@pytest.fixture(params=[0], ids=lambda x: "axis {}".format(x)) +def axis_series(request): + """ + Fixture for returning the axis numbers of a series. + """ + return request.param + + @pytest.fixture def ip(): """ diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py index 8b133e654a869..ca1c43c8d0f7e 100644 --- a/pandas/tests/generic/test_label_or_level_utils.py +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -76,7 +76,6 @@ def assert_level_reference(frame, levels, axis): # DataFrame # --------- -@pytest.mark.parametrize('axis', [0, 1]) def test_is_level_or_label_reference_df_simple(df_levels, axis): # Compute expected labels and levels @@ -91,7 +90,6 @@ def test_is_level_or_label_reference_df_simple(df_levels, axis): assert_label_reference(df_levels, expected_labels, axis=axis) -@pytest.mark.parametrize('axis', [0, 1]) def test_is_level_reference_df_ambig(df_ambig, axis): # Transpose frame if axis == 1 @@ -165,7 +163,6 @@ def test_is_label_or_level_reference_panel_error(panel): # DataFrame # --------- -@pytest.mark.parametrize('axis', [0, 1]) def test_check_label_or_level_ambiguity_df(df_ambig, axis): # Transpose frame if axis == 1 @@ -264,7 +261,6 @@ def assert_level_values(frame, levels, axis): # DataFrame # --------- -@pytest.mark.parametrize('axis', [0, 1]) def test_get_label_or_level_values_df_simple(df_levels, axis): # Compute expected labels and levels @@ -279,7 +275,6 @@ def test_get_label_or_level_values_df_simple(df_levels, axis): assert_level_values(df_levels, expected_levels, axis=axis) -@pytest.mark.parametrize('axis', [0, 1]) def test_get_label_or_level_values_df_ambig(df_ambig, axis): # Transpose frame if axis == 1 @@ -300,7 +295,6 @@ def test_get_label_or_level_values_df_ambig(df_ambig, axis): assert_label_values(df_ambig, ['L3'], axis=axis) -@pytest.mark.parametrize('axis', [0, 1]) def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): # Transpose frame if axis == 1 @@ -383,7 +377,6 @@ def assert_levels_dropped(frame, levels, axis): # DataFrame # --------- -@pytest.mark.parametrize('axis', [0, 1]) def test_drop_labels_or_levels_df(df_levels, axis): # Compute expected labels and levels From 2be37475ffa04e08d9901b493378813fd851b1df Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 16 Jul 2018 14:55:01 +0100 Subject: [PATCH 4/7] correct apply(axis=1) and related bugs --- doc/source/whatsnew/v0.24.0.txt | 6 +- pandas/core/apply.py | 14 ++--- pandas/core/frame.py | 13 ++++- pandas/core/generic.py | 16 +++--- pandas/tests/frame/test_apply.py | 98 +++++++++++++++++++------------- 5 files changed, 88 insertions(+), 59 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3b04d9937d7f2..38f1ac4193947 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -475,7 +475,11 @@ Numeric - Bug in :class:`Series` ``__rmatmul__`` doesn't support matrix vector multiplication (:issue:`21530`) - Bug in :func:`factorize` fails with read-only array (:issue:`12813`) - Fixed bug in :func:`unique` handled signed zeros inconsistently: for some inputs 0.0 and -0.0 were treated as equal and for some inputs as different. Now they are treated as equal for all inputs (:issue:`21866`) -- +- Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` when ``axis=1``. + Using ``apply`` with a list of functions and axis=1 (e.g. ``df.apply(['abs'], axis=1)``) + previously gave a TypeError. This fixes that issue. + As ``agg`` and ``transform`` in some cases delegate to ``apply``, this also + fixed this issue for them (:issue:`16679`). - Strings diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 27ac5038276d6..77901bda75ce4 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -105,6 +105,11 @@ def agg_axis(self): def get_result(self): """ compute the results """ + # dispatch to agg + if isinstance(self.f, (list, dict)): + return self.obj.aggregate(self.f, axis=self.axis, + *self.args, **self.kwds) + # all empty if len(self.columns) == 0 and len(self.index) == 0: return self.apply_empty_result() @@ -308,15 +313,6 @@ def wrap_results(self): class FrameRowApply(FrameApply): axis = 0 - def get_result(self): - - # dispatch to agg - if isinstance(self.f, (list, dict)): - return self.obj.aggregate(self.f, axis=self.axis, - *self.args, **self.kwds) - - return super(FrameRowApply, self).get_result() - def apply_broadcast(self): return super(FrameRowApply, self).apply_broadcast(self.obj) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 93603debf649c..401982f07d66c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6080,11 +6080,20 @@ def aggregate(self, func, axis=0, *args, **kwargs): return result def _aggregate(self, arg, axis=0, *args, **kwargs): - obj = self.T if axis == 1 else self - return super(DataFrame, obj)._aggregate(arg, *args, **kwargs) + if axis == 1: + result, how = (super(DataFrame, self.T) + ._aggregate(arg, *args, **kwargs)) + result = result.T if result is not None else result + return result, how + return super(DataFrame, self)._aggregate(arg, *args, **kwargs) agg = aggregate + def transform(self, func, axis=0, *args, **kwargs): + if axis == 1: + return super(DataFrame, self.T).transform(func, *args, **kwargs).T + return super(DataFrame, self).transform(func, *args, **kwargs) + def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, result_type=None, args=(), **kwds): """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 16105014bf74e..1126500fa55b2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9193,16 +9193,14 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None, cls.ewm = ewm - @Appender(_shared_docs['transform'] % _shared_doc_kwargs) - def transform(self, func, *args, **kwargs): - result = self.agg(func, *args, **kwargs) - if is_scalar(result) or len(result) != len(self): - raise ValueError("transforms cannot produce " - "aggregated results") + @Appender(_shared_docs['transform'] % _shared_doc_kwargs) + def transform(self, func, *args, **kwargs): + result = self.agg(func, *args, **kwargs) + if is_scalar(result) or len(result) != len(self): + raise ValueError("transforms cannot produce " + "aggregated results") - return result - - cls.transform = transform + return result # ---------------------------------------------------------------------- # Misc methods diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 7a6c9fb93fff0..bb7c99076c7ce 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -5,6 +5,7 @@ import pytest import operator +from collections import OrderedDict from datetime import datetime from itertools import chain @@ -846,58 +847,74 @@ def test_consistency_for_boxed(self, box): assert_frame_equal(result, expected) -def zip_frames(*frames): +def zip_frames(frames, axis=1): """ - take a list of frames, zip the columns together for each - assume that these all have the first frame columns + take a list of frames, zip them together under the + assumption that these all have the first frames' index/columns. - return a new frame + Returns + ------- + new_frame : DataFrame """ - columns = frames[0].columns - zipped = [f[c] for c in columns for f in frames] - return pd.concat(zipped, axis=1) + if axis == 1: + columns = frames[0].columns + zipped = [f.loc[:, c] for c in columns for f in frames] + return pd.concat(zipped, axis=1) + else: + index = frames[0].index + zipped = [f.loc[i, :] for i in index for f in frames] + return pd.DataFrame(zipped) class TestDataFrameAggregate(TestData): - def test_agg_transform(self): + def test_agg_transform(self, axis): + other_axis = abs(axis - 1) with np.errstate(all='ignore'): - f_sqrt = np.sqrt(self.frame) f_abs = np.abs(self.frame) + f_sqrt = np.sqrt(self.frame) # ufunc - result = self.frame.transform(np.sqrt) + result = self.frame.transform(np.sqrt, axis=axis) expected = f_sqrt.copy() assert_frame_equal(result, expected) - result = self.frame.apply(np.sqrt) + result = self.frame.apply(np.sqrt, axis=axis) assert_frame_equal(result, expected) - result = self.frame.transform(np.sqrt) + result = self.frame.transform(np.sqrt, axis=axis) assert_frame_equal(result, expected) # list-like - result = self.frame.apply([np.sqrt]) + result = self.frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() - expected.columns = pd.MultiIndex.from_product( - [self.frame.columns, ['sqrt']]) + if axis == 0: + expected.columns = pd.MultiIndex.from_product( + [self.frame.columns, ['sqrt']]) + else: + expected.index = pd.MultiIndex.from_product( + [self.frame.index, ['sqrt']]) assert_frame_equal(result, expected) - result = self.frame.transform([np.sqrt]) + result = self.frame.transform([np.sqrt], axis=axis) assert_frame_equal(result, expected) # multiple items in list # these are in the order as if we are applying both # functions per series and then concatting - expected = zip_frames(f_sqrt, f_abs) - expected.columns = pd.MultiIndex.from_product( - [self.frame.columns, ['sqrt', 'absolute']]) - result = self.frame.apply([np.sqrt, np.abs]) + result = self.frame.apply([np.abs, np.sqrt], axis=axis) + expected = zip_frames([f_abs, f_sqrt], axis=other_axis) + if axis == 0: + expected.columns = pd.MultiIndex.from_product( + [self.frame.columns, ['absolute', 'sqrt']]) + else: + expected.index = pd.MultiIndex.from_product( + [self.frame.index, ['absolute', 'sqrt']]) assert_frame_equal(result, expected) - result = self.frame.transform(['sqrt', np.abs]) + result = self.frame.transform([np.abs, 'sqrt'], axis=axis) assert_frame_equal(result, expected) def test_transform_and_agg_err(self, axis): @@ -985,46 +1002,51 @@ def test_agg_dict_nested_renaming_depr(self): def test_agg_reduce(self, axis): other_axis = abs(axis - 1) - name1, name2 = self.frame.axes[other_axis].unique()[:2] + name1, name2 = self.frame.axes[other_axis].unique()[:2].sort_values() # all reducers - expected = zip_frames(self.frame.mean(axis=axis).to_frame(), - self.frame.max(axis=axis).to_frame(), - self.frame.sum(axis=axis).to_frame()).T - expected.index = ['mean', 'max', 'sum'] + expected = pd.concat([self.frame.mean(axis=axis), + self.frame.max(axis=axis), + self.frame.sum(axis=axis), + ], axis=1) + expected.columns = ['mean', 'max', 'sum'] + expected = expected.T if axis == 0 else expected + result = self.frame.agg(['mean', 'max', 'sum'], axis=axis) assert_frame_equal(result, expected) # dict input with scalars - func = {name1: 'mean', name2: 'sum'} + func = OrderedDict([(name1, 'mean'), (name2, 'sum')]) result = self.frame.agg(func, axis=axis) expected = Series([self.frame.loc(other_axis)[name1].mean(), self.frame.loc(other_axis)[name2].sum()], index=[name1, name2]) - assert_series_equal(result.reindex_like(expected), expected) + assert_series_equal(result, expected) # dict input with lists - func = {name1: ['mean'], name2: ['sum']} + func = OrderedDict([(name1, ['mean']), (name2, ['sum'])]) result = self.frame.agg(func, axis=axis) expected = DataFrame({ name1: Series([self.frame.loc(other_axis)[name1].mean()], index=['mean']), name2: Series([self.frame.loc(other_axis)[name2].sum()], index=['sum'])}) - assert_frame_equal(result.reindex_like(expected), expected) + expected = expected.T if axis == 1 else expected + assert_frame_equal(result, expected) # dict input with lists with multiple - func = {name1: ['mean', 'sum'], - name2: ['sum', 'max']} + func = OrderedDict([(name1, ['mean', 'sum']), (name2, ['sum', 'max'])]) result = self.frame.agg(func, axis=axis) - expected = DataFrame({ - name1: Series([self.frame.loc(other_axis)[name1].mean(), + expected = DataFrame(OrderedDict([ + (name1, Series([self.frame.loc(other_axis)[name1].mean(), self.frame.loc(other_axis)[name1].sum()], - index=['mean', 'sum']), - name2: Series([self.frame.loc(other_axis)[name2].sum(), + index=['mean', 'sum'])), + (name2, Series([self.frame.loc(other_axis)[name2].sum(), self.frame.loc(other_axis)[name2].max()], - index=['sum', 'max'])}) - assert_frame_equal(result.reindex_like(expected), expected) + index=['sum', 'max'])), + ])) + expected = expected.T if axis == 1 else expected + assert_frame_equal(result, expected) def test_nuiscance_columns(self): From b6382d4487534b033c5cf4fddd9748cfc69c451e Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 20 Jul 2018 18:45:04 +0100 Subject: [PATCH 5/7] 'index' and 'columns' added to fixture and related changes. --- doc/source/whatsnew/v0.24.0.txt | 8 +++--- pandas/conftest.py | 6 ++--- pandas/core/apply.py | 4 ++- pandas/core/frame.py | 4 +-- pandas/tests/frame/test_apply.py | 14 +++++----- .../generic/test_label_or_level_utils.py | 26 +++++++++---------- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 38f1ac4193947..04c2e253cfa5d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -475,11 +475,9 @@ Numeric - Bug in :class:`Series` ``__rmatmul__`` doesn't support matrix vector multiplication (:issue:`21530`) - Bug in :func:`factorize` fails with read-only array (:issue:`12813`) - Fixed bug in :func:`unique` handled signed zeros inconsistently: for some inputs 0.0 and -0.0 were treated as equal and for some inputs as different. Now they are treated as equal for all inputs (:issue:`21866`) -- Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` when ``axis=1``. - Using ``apply`` with a list of functions and axis=1 (e.g. ``df.apply(['abs'], axis=1)``) - previously gave a TypeError. This fixes that issue. - As ``agg`` and ``transform`` in some cases delegate to ``apply``, this also - fixed this issue for them (:issue:`16679`). +- Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` where, + when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``), + a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`). - Strings diff --git a/pandas/conftest.py b/pandas/conftest.py index 98faeee6b09c3..f81c1c124ca74 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -60,8 +60,8 @@ def spmatrix(request): return getattr(sparse, request.param + '_matrix') -@pytest.fixture(params=[0, 1], - ids=lambda x: "axis {}".format(x)) +@pytest.fixture(params=[0, 1, 'index', 'columns'], + ids=lambda x: "axis {!r}".format(x)) def axis(request): """ Fixture for returning the axis numbers of a dataframe. @@ -69,7 +69,7 @@ def axis(request): return request.param -@pytest.fixture(params=[0], ids=lambda x: "axis {}".format(x)) +@pytest.fixture(params=[0, 'index'], ids=lambda x: "axis {!r}".format(x)) def axis_series(request): """ Fixture for returning the axis numbers of a series. diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 77901bda75ce4..989becbf133ca 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -5,6 +5,8 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( is_extension_type, + is_dict_like, + is_list_like, is_sequence) from pandas.util._decorators import cache_readonly @@ -106,7 +108,7 @@ def get_result(self): """ compute the results """ # dispatch to agg - if isinstance(self.f, (list, dict)): + if is_list_like(self.f) or is_dict_like(self.f): return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 401982f07d66c..8f6ebef0bf6e4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6080,7 +6080,7 @@ def aggregate(self, func, axis=0, *args, **kwargs): return result def _aggregate(self, arg, axis=0, *args, **kwargs): - if axis == 1: + if axis in {1, 'columns'}: result, how = (super(DataFrame, self.T) ._aggregate(arg, *args, **kwargs)) result = result.T if result is not None else result @@ -6090,7 +6090,7 @@ def _aggregate(self, arg, axis=0, *args, **kwargs): agg = aggregate def transform(self, func, axis=0, *args, **kwargs): - if axis == 1: + if axis in {1, 'columns'}: return super(DataFrame, self.T).transform(func, *args, **kwargs).T return super(DataFrame, self).transform(func, *args, **kwargs) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index bb7c99076c7ce..1b09aa18dcaff 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -869,7 +869,7 @@ def zip_frames(frames, axis=1): class TestDataFrameAggregate(TestData): def test_agg_transform(self, axis): - other_axis = abs(axis - 1) + other_axis = 1 if axis in {0, 'index'} else 0 with np.errstate(all='ignore'): @@ -890,7 +890,7 @@ def test_agg_transform(self, axis): # list-like result = self.frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() - if axis == 0: + if axis in {0, 'index'}: expected.columns = pd.MultiIndex.from_product( [self.frame.columns, ['sqrt']]) else: @@ -906,7 +906,7 @@ def test_agg_transform(self, axis): # functions per series and then concatting result = self.frame.apply([np.abs, np.sqrt], axis=axis) expected = zip_frames([f_abs, f_sqrt], axis=other_axis) - if axis == 0: + if axis in {0, 'index'}: expected.columns = pd.MultiIndex.from_product( [self.frame.columns, ['absolute', 'sqrt']]) else: @@ -1001,7 +1001,7 @@ def test_agg_dict_nested_renaming_depr(self): 'B': {'bar': 'max'}}) def test_agg_reduce(self, axis): - other_axis = abs(axis - 1) + other_axis = 1 if axis in {0, 'index'} else 0 name1, name2 = self.frame.axes[other_axis].unique()[:2].sort_values() # all reducers @@ -1010,7 +1010,7 @@ def test_agg_reduce(self, axis): self.frame.sum(axis=axis), ], axis=1) expected.columns = ['mean', 'max', 'sum'] - expected = expected.T if axis == 0 else expected + expected = expected.T if axis in {0, 'index'} else expected result = self.frame.agg(['mean', 'max', 'sum'], axis=axis) assert_frame_equal(result, expected) @@ -1031,7 +1031,7 @@ def test_agg_reduce(self, axis): index=['mean']), name2: Series([self.frame.loc(other_axis)[name2].sum()], index=['sum'])}) - expected = expected.T if axis == 1 else expected + expected = expected.T if axis in {1, 'columns'} else expected assert_frame_equal(result, expected) # dict input with lists with multiple @@ -1045,7 +1045,7 @@ def test_agg_reduce(self, axis): self.frame.loc(other_axis)[name2].max()], index=['sum', 'max'])), ])) - expected = expected.T if axis == 1 else expected + expected = expected.T if axis in {1, 'columns'} else expected assert_frame_equal(result, expected) def test_nuiscance_columns(self): diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py index ca1c43c8d0f7e..8e4d28fc796df 100644 --- a/pandas/tests/generic/test_label_or_level_utils.py +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -82,7 +82,7 @@ def test_is_level_or_label_reference_df_simple(df_levels, axis): expected_labels, expected_levels = get_labels_levels(df_levels) # Transpose frame if axis == 1 - if axis == 1: + if axis in {1, 'columns'}: df_levels = df_levels.T # Perform checks @@ -93,7 +93,7 @@ def test_is_level_or_label_reference_df_simple(df_levels, axis): def test_is_level_reference_df_ambig(df_ambig, axis): # Transpose frame if axis == 1 - if axis == 1: + if axis in {1, 'columns'}: df_ambig = df_ambig.T # df has both an on-axis level and off-axis label named L1 @@ -166,7 +166,7 @@ def test_is_label_or_level_reference_panel_error(panel): def test_check_label_or_level_ambiguity_df(df_ambig, axis): # Transpose frame if axis == 1 - if axis == 1: + if axis in {1, 'columns'}: df_ambig = df_ambig.T # df_ambig has both an on-axis level and off-axis label named L1 @@ -176,7 +176,7 @@ def test_check_label_or_level_ambiguity_df(df_ambig, axis): assert df_ambig._check_label_or_level_ambiguity('L1', axis=axis) warning_msg = w[0].message.args[0] - if axis == 0: + if axis in {0, 'index'}: assert warning_msg.startswith("'L1' is both an index level " "and a column label") else: @@ -236,7 +236,7 @@ def test_check_label_or_level_ambiguity_panel_error(panel): # =============================== def assert_label_values(frame, labels, axis): for label in labels: - if axis == 0: + if axis in {0, 'index'}: expected = frame[label]._values else: expected = frame.loc[label]._values @@ -248,7 +248,7 @@ def assert_label_values(frame, labels, axis): def assert_level_values(frame, levels, axis): for level in levels: - if axis == 0: + if axis in {0, 'index'}: expected = frame.index.get_level_values(level=level)._values else: expected = (frame.columns @@ -267,7 +267,7 @@ def test_get_label_or_level_values_df_simple(df_levels, axis): expected_labels, expected_levels = get_labels_levels(df_levels) # Transpose frame if axis == 1 - if axis == 1: + if axis in {1, 'columns'}: df_levels = df_levels.T # Perform checks @@ -278,7 +278,7 @@ def test_get_label_or_level_values_df_simple(df_levels, axis): def test_get_label_or_level_values_df_ambig(df_ambig, axis): # Transpose frame if axis == 1 - if axis == 1: + if axis in {1, 'columns'}: df_ambig = df_ambig.T # df has both an on-axis level and off-axis label named L1 @@ -298,7 +298,7 @@ def test_get_label_or_level_values_df_ambig(df_ambig, axis): def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): # Transpose frame if axis == 1 - if axis == 1: + if axis in {1, 'columns'}: df_duplabels = df_duplabels.T # df has unambiguous level 'L1' @@ -308,7 +308,7 @@ def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): assert_label_values(df_duplabels, ['L3'], axis=axis) # df has duplicate labels 'L2' - if axis == 0: + if axis in {0, 'index'}: expected_msg = "The column label 'L2' is not unique" else: expected_msg = "The index label 'L2' is not unique" @@ -355,7 +355,7 @@ def assert_labels_dropped(frame, labels, axis): for label in labels: df_dropped = frame._drop_labels_or_levels(label, axis=axis) - if axis == 0: + if axis in {0, 'index'}: assert label in frame.columns assert label not in df_dropped.columns else: @@ -367,7 +367,7 @@ def assert_levels_dropped(frame, levels, axis): for level in levels: df_dropped = frame._drop_labels_or_levels(level, axis=axis) - if axis == 0: + if axis in {0, 'index'}: assert level in frame.index.names assert level not in df_dropped.index.names else: @@ -383,7 +383,7 @@ def test_drop_labels_or_levels_df(df_levels, axis): expected_labels, expected_levels = get_labels_levels(df_levels) # Transpose frame if axis == 1 - if axis == 1: + if axis in {1, 'columns'}: df_levels = df_levels.T # Perform checks From 5ad024c25d89b32ea368afb45e026c7130cbba04 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 22 Jul 2018 14:35:52 +0100 Subject: [PATCH 6/7] add conftest cython_table_items + a few corrections --- pandas/conftest.py | 18 +++++++++++++++--- pandas/core/frame.py | 6 ++++-- pandas/tests/frame/test_apply.py | 12 +++--------- pandas/tests/series/test_apply.py | 9 ++------- 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index f81c1c124ca74..f07bedd57c2fe 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -6,7 +6,7 @@ import pandas import numpy as np import pandas as pd -from pandas.compat import PY3 +from pandas.compat import PY3, PY36 import pandas.util._test_decorators as td @@ -64,7 +64,7 @@ def spmatrix(request): ids=lambda x: "axis {!r}".format(x)) def axis(request): """ - Fixture for returning the axis numbers of a dataframe. + Fixture for returning the axis numbers of a DataFrame. """ return request.param @@ -72,7 +72,7 @@ def axis(request): @pytest.fixture(params=[0, 'index'], ids=lambda x: "axis {!r}".format(x)) def axis_series(request): """ - Fixture for returning the axis numbers of a series. + Fixture for returning the axis numbers of a Series. """ return request.param @@ -120,6 +120,18 @@ def all_arithmetic_operators(request): return request.param +_cython_table = list(pd.core.base.SelectionMixin._cython_table.items()) +if not PY36: + # dicts have random order in Python<3.6, which xdist doesn't like + _cython_table = sorted(((key, value) for key, value in _cython_table), + key=lambda x: x[0].__class__.__name__) + + +@pytest.fixture(params=_cython_table) +def cython_table_items(request): + return request.param + + @pytest.fixture(params=['__eq__', '__ne__', '__le__', '__lt__', '__ge__', '__gt__']) def all_compare_operators(request): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f6ebef0bf6e4..99b72604c605c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6080,7 +6080,8 @@ def aggregate(self, func, axis=0, *args, **kwargs): return result def _aggregate(self, arg, axis=0, *args, **kwargs): - if axis in {1, 'columns'}: + axis = self._get_axis_number(axis) + if axis == 1: result, how = (super(DataFrame, self.T) ._aggregate(arg, *args, **kwargs)) result = result.T if result is not None else result @@ -6090,7 +6091,8 @@ def _aggregate(self, arg, axis=0, *args, **kwargs): agg = aggregate def transform(self, func, axis=0, *args, **kwargs): - if axis in {1, 'columns'}: + axis = self._get_axis_number(axis) + if axis == 1: return super(DataFrame, self.T).transform(func, *args, **kwargs).T return super(DataFrame, self).transform(func, *args, **kwargs) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 1b09aa18dcaff..d758c6c0df86b 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -40,18 +40,12 @@ def _get_cython_table_params(frame, func_names_and_expected): results : list List of three items (DataFrame, function, expected result) """ - table = pd.core.base.SelectionMixin._cython_table - if compat.PY36: - table = list(table.items()) - else: # dicts have random order in Python<3.6, which xdist doesn't like - table = sorted(((key, value) for key, value in table.items()), - key=lambda x: x[0].__class__.__name__) + from pandas.conftest import _cython_table results = [] for func_name, expected in func_names_and_expected: results.append((frame, func_name, expected)) - results += [ - (frame, func, expected) for func, name in table - if name == func_name] + results += [(frame, func, expected) for func, name in _cython_table + if name == func_name] return results diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index e85a8732092c9..9b4db0ec1a2d0 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -35,17 +35,12 @@ def _get_cython_table_params(series, func_names_and_expected): results : list List of three items (Series, function, expected result) """ - table = pd.core.base.SelectionMixin._cython_table - if compat.PY36: - table = list(table.items()) - else: # dicts have random order in Python<3.6, which xdist doesn't like - table = sorted(((key, value) for key, value in table.items()), - key=lambda x: x[0].__class__.__name__) + from pandas.conftest import _cython_table results = [] for func_name, expected in func_names_and_expected: results.append((series, func_name, expected)) results += [ - (series, func, expected) for func, name in table + (series, func, expected) for func, name in _cython_table if name == func_name] return results From 39ced290b257435e97c74dc95aff2391fdf7fd02 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 25 Jul 2018 14:46:11 +0100 Subject: [PATCH 7/7] clarified according to comments --- pandas/conftest.py | 38 ++++++++++++++++++++++++++----- pandas/core/frame.py | 4 +++- pandas/tests/frame/test_apply.py | 27 +--------------------- pandas/tests/series/test_apply.py | 31 +++---------------------- 4 files changed, 39 insertions(+), 61 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index f07bedd57c2fe..e878b32fcad7b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -6,7 +6,7 @@ import pandas import numpy as np import pandas as pd -from pandas.compat import PY3, PY36 +from pandas.compat import PY3 import pandas.util._test_decorators as td @@ -69,6 +69,9 @@ def axis(request): return request.param +axis_frame = axis + + @pytest.fixture(params=[0, 'index'], ids=lambda x: "axis {!r}".format(x)) def axis_series(request): """ @@ -120,11 +123,10 @@ def all_arithmetic_operators(request): return request.param -_cython_table = list(pd.core.base.SelectionMixin._cython_table.items()) -if not PY36: - # dicts have random order in Python<3.6, which xdist doesn't like - _cython_table = sorted(((key, value) for key, value in _cython_table), - key=lambda x: x[0].__class__.__name__) +# use sorted as dicts in py<3.6 have random order, which xdist doesn't like +_cython_table = sorted(((key, value) for key, value in + pd.core.base.SelectionMixin._cython_table.items()), + key=lambda x: x[0].__class__.__name__) @pytest.fixture(params=_cython_table) @@ -132,6 +134,30 @@ def cython_table_items(request): return request.param +def _get_cython_table_params(ndframe, func_names_and_expected): + """combine frame, functions from SelectionMixin._cython_table + keys and expected result. + + Parameters + ---------- + ndframe : DataFrame or Series + func_names_and_expected : Sequence of two items + The first item is a name of a NDFrame method ('sum', 'prod') etc. + The second item is the expected return value + + Returns + ------- + results : list + List of three items (DataFrame, function, expected result) + """ + results = [] + for func_name, expected in func_names_and_expected: + results.append((ndframe, func_name, expected)) + results += [(ndframe, func, expected) for func, name in _cython_table + if name == func_name] + return results + + @pytest.fixture(params=['__eq__', '__ne__', '__le__', '__lt__', '__ge__', '__gt__']) def all_compare_operators(request): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 99b72604c605c..a66b9a7e92e85 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6080,8 +6080,9 @@ def aggregate(self, func, axis=0, *args, **kwargs): return result def _aggregate(self, arg, axis=0, *args, **kwargs): - axis = self._get_axis_number(axis) if axis == 1: + # NDFrame.aggregate returns a tuple, and we need to transpose + # only result result, how = (super(DataFrame, self.T) ._aggregate(arg, *args, **kwargs)) result = result.T if result is not None else result @@ -6090,6 +6091,7 @@ def _aggregate(self, arg, axis=0, *args, **kwargs): agg = aggregate + @Appender(_shared_docs['transform'] % _shared_doc_kwargs) def transform(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) if axis == 1: diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index d758c6c0df86b..e038588b76ffd 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -20,35 +20,10 @@ from pandas.util.testing import (assert_series_equal, assert_frame_equal) import pandas.util.testing as tm +from pandas.conftest import _get_cython_table_params from pandas.tests.frame.common import TestData -def _get_cython_table_params(frame, func_names_and_expected): - """combine frame, functions from SelectionMixin._cython_table - keys and expected result. - - Parameters - ---------- - frame : DataFrame - A symmetrical DataFrame - func_names_and_expected : Sequence of two items - The first item is a name of a NDFrame method ('sum', 'prod') etc. - The second item is the expected return value - - Returns - ------- - results : list - List of three items (DataFrame, function, expected result) - """ - from pandas.conftest import _cython_table - results = [] - for func_name, expected in func_names_and_expected: - results.append((frame, func_name, expected)) - results += [(frame, func, expected) for func, name in _cython_table - if name == func_name] - return results - - class TestDataFrameApply(TestData): def test_apply(self): diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 9b4db0ec1a2d0..b717d75d835d0 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -12,39 +12,14 @@ from pandas import (Index, Series, DataFrame, isna) from pandas.compat import lrange from pandas import compat -from pandas.util.testing import assert_series_equal, assert_frame_equal +from pandas.util.testing import (assert_series_equal, + assert_frame_equal) import pandas.util.testing as tm +from pandas.conftest import _get_cython_table_params from .common import TestData -def _get_cython_table_params(series, func_names_and_expected): - """combine series, functions from SelectionMixin._cython_table - keys and expected result. - - Parameters - ---------- - series : Series - A Series - func_names_and_expected : Sequence of two items - The first item is a name of a NDFrame method ('sum', 'prod') etc. - The second item is the expected return value - - Returns - ------- - results : list - List of three items (Series, function, expected result) - """ - from pandas.conftest import _cython_table - results = [] - for func_name, expected in func_names_and_expected: - results.append((series, func_name, expected)) - results += [ - (series, func, expected) for func, name in _cython_table - if name == func_name] - return results - - class TestSeriesApply(TestData): def test_apply(self):