diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c65502898195a..d1bf38588710b 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -323,6 +323,7 @@ Numeric - Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`) - Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`) - Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`) +- Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`) - Conversion diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 46b1e5b20ce3a..c7fa298b06a2f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -159,6 +159,10 @@ def f(x): def index(self) -> Index: return self.obj.index + @property + def agg_axis(self) -> Index: + return self.obj._get_agg_axis(self.axis) + @abc.abstractmethod def apply(self) -> FrameOrSeriesUnion: pass @@ -541,17 +545,26 @@ def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]: f = self.f if not isinstance(f, str): return None + + obj = self.obj + + # TODO: GH 39993 - Avoid special-casing by replacing with lambda + if f == "size" and isinstance(obj, ABCDataFrame): + # Special-cased because DataFrame.size returns a single scalar + value = obj.shape[self.axis] + return obj._constructor_sliced(value, index=self.agg_axis, name="size") + # Support for `frame.transform('method')` # Some methods (shift, etc.) require the axis argument, others # don't, so inspect and insert if necessary. - func = getattr(self.obj, f, None) + func = getattr(obj, f, None) if callable(func): sig = inspect.getfullargspec(func) if "axis" in sig.args: self.kwargs["axis"] = self.axis elif self.axis != 0: raise ValueError(f"Operation {f} does not support axis=1") - return self.obj._try_aggregate_string_function(f, *self.args, **self.kwargs) + return obj._try_aggregate_string_function(f, *self.args, **self.kwargs) def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]: """ @@ -613,10 +626,6 @@ def values(self): def dtypes(self) -> Series: return self.obj.dtypes - @property - def agg_axis(self) -> Index: - return self.obj._get_agg_axis(self.axis) - def apply(self) -> FrameOrSeriesUnion: """ compute the results """ # dispatch to agg diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 083c34ce4b63f..c5d0b215ff4d8 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1415,11 +1415,21 @@ def test_non_callable_aggregates(how): tm.assert_series_equal(result, expected) - # Just a string attribute arg same as calling df.arg - result = getattr(df, how)("size") - expected = df.size - assert result == expected +@pytest.mark.parametrize("how", ["agg", "apply"]) +def test_size_as_str(how, axis): + # GH 39934 + df = DataFrame( + {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]} + ) + # Just a string attribute arg same as calling df.arg + # on the columns + result = getattr(df, how)("size", axis=axis) + if axis == 0 or axis == "index": + expected = Series(df.shape[0], index=df.columns, name="size") + else: + expected = Series(df.shape[1], index=df.index, name="size") + tm.assert_series_equal(result, expected) def test_agg_listlike_result():