From 8e1c0aaf0c9e9b79b08712b1ec7b96d46f1b2001 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 20 Feb 2021 09:17:37 -0500 Subject: [PATCH 1/3] BUG: DataFrame.agg and apply with 'size' returns a scalar --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/apply.py | 20 ++++++++++++++------ pandas/tests/apply/test_frame_apply.py | 18 ++++++++++++++---- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index bf67ff6525005..0f226cef79818 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -320,6 +320,7 @@ Numeric - Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`) - Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`) - Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`) +- Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`) - Conversion diff --git a/pandas/core/apply.py b/pandas/core/apply.py index b41c432dff172..3b37f7e65e93d 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -157,6 +157,10 @@ def f(x): def index(self) -> Index: return self.obj.index + @property + def agg_axis(self) -> Index: + return self.obj._get_agg_axis(self.axis) + @abc.abstractmethod def apply(self) -> FrameOrSeriesUnion: pass @@ -414,17 +418,25 @@ def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]: f = self.f if not isinstance(f, str): return None + + obj = self.obj + + if f == "size" and isinstance(obj, ABCDataFrame): + # Special-cased because DataFrame.size returns a single scalar + value = obj.shape[self.axis] + return obj._constructor_sliced(value, index=self.agg_axis, name="size") + # Support for `frame.transform('method')` # Some methods (shift, etc.) require the axis argument, others # don't, so inspect and insert if necessary. - func = getattr(self.obj, f, None) + func = getattr(obj, f, None) if callable(func): sig = inspect.getfullargspec(func) if "axis" in sig.args: self.kwargs["axis"] = self.axis elif self.axis != 0: raise ValueError(f"Operation {f} does not support axis=1") - return self.obj._try_aggregate_string_function(f, *self.args, **self.kwargs) + return obj._try_aggregate_string_function(f, *self.args, **self.kwargs) def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]: """ @@ -486,10 +498,6 @@ def values(self): def dtypes(self) -> Series: return self.obj.dtypes - @property - def agg_axis(self) -> Index: - return self.obj._get_agg_axis(self.axis) - def apply(self) -> FrameOrSeriesUnion: """ compute the results """ # dispatch to agg diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 3ac9d98874f86..339e7e4a169c0 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1376,16 +1376,26 @@ def test_non_callable_aggregates(self, how): tm.assert_frame_equal(result2, expected, check_like=True) # Just functional string arg is same as calling df.arg() + # on the columns result = getattr(df, how)("count") expected = df.count() tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("how", ["agg", "apply"]) + def test_size_as_str(self, how, axis): + # GH 39934 + df = DataFrame( + {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]} + ) # Just a string attribute arg same as calling df.arg - result = getattr(df, how)("size") - expected = df.size - - assert result == expected + # on the columns + result = getattr(df, how)("size", axis=axis) + if axis == 0 or axis == "index": + expected = Series(df.shape[0], index=df.columns, name="size") + else: + expected = Series(df.shape[1], index=df.index, name="size") + tm.assert_series_equal(result, expected) def test_agg_listlike_result(self): # GH-29587 user defined function returning list-likes From 4bb5d864ecfac1c2ffaec988b690e85542f1d529 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 23 Feb 2021 08:45:59 -0500 Subject: [PATCH 2/3] Added TODO --- pandas/core/apply.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 627ba888e7909..c7fa298b06a2f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -548,6 +548,7 @@ def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]: obj = self.obj + # TODO: GH 39993 - Avoid special-casing by replacing with lambda if f == "size" and isinstance(obj, ABCDataFrame): # Special-cased because DataFrame.size returns a single scalar value = obj.shape[self.axis] From 042c68bf267e3028f4a6062d7f9e6489fc265561 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 24 Feb 2021 08:01:57 -0500 Subject: [PATCH 3/3] Merge cleanup --- pandas/tests/apply/test_frame_apply.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index d754978373902..c5d0b215ff4d8 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1415,12 +1415,6 @@ def test_non_callable_aggregates(how): tm.assert_series_equal(result, expected) - # Just a string attribute arg same as calling df.arg - result = getattr(df, how)("size") - expected = df.size - - assert result == expected - @pytest.mark.parametrize("how", ["agg", "apply"]) def test_size_as_str(how, axis):