Skip to content

Commit 212323f

Browse files
authored
BUG: DataFrame.agg and apply with 'size' returns a scalar (#39935)
1 parent dec7d21 commit 212323f

File tree

3 files changed

+30
-10
lines changed

3 files changed

+30
-10
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ Numeric
323323
- Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
324324
- Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`)
325325
- Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`)
326+
- Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`)
326327
-
327328

328329
Conversion

pandas/core/apply.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,10 @@ def f(x):
159159
def index(self) -> Index:
160160
return self.obj.index
161161

162+
@property
163+
def agg_axis(self) -> Index:
164+
return self.obj._get_agg_axis(self.axis)
165+
162166
@abc.abstractmethod
163167
def apply(self) -> FrameOrSeriesUnion:
164168
pass
@@ -541,17 +545,26 @@ def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]:
541545
f = self.f
542546
if not isinstance(f, str):
543547
return None
548+
549+
obj = self.obj
550+
551+
# TODO: GH 39993 - Avoid special-casing by replacing with lambda
552+
if f == "size" and isinstance(obj, ABCDataFrame):
553+
# Special-cased because DataFrame.size returns a single scalar
554+
value = obj.shape[self.axis]
555+
return obj._constructor_sliced(value, index=self.agg_axis, name="size")
556+
544557
# Support for `frame.transform('method')`
545558
# Some methods (shift, etc.) require the axis argument, others
546559
# don't, so inspect and insert if necessary.
547-
func = getattr(self.obj, f, None)
560+
func = getattr(obj, f, None)
548561
if callable(func):
549562
sig = inspect.getfullargspec(func)
550563
if "axis" in sig.args:
551564
self.kwargs["axis"] = self.axis
552565
elif self.axis != 0:
553566
raise ValueError(f"Operation {f} does not support axis=1")
554-
return self.obj._try_aggregate_string_function(f, *self.args, **self.kwargs)
567+
return obj._try_aggregate_string_function(f, *self.args, **self.kwargs)
555568

556569
def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]:
557570
"""
@@ -613,10 +626,6 @@ def values(self):
613626
def dtypes(self) -> Series:
614627
return self.obj.dtypes
615628

616-
@property
617-
def agg_axis(self) -> Index:
618-
return self.obj._get_agg_axis(self.axis)
619-
620629
def apply(self) -> FrameOrSeriesUnion:
621630
""" compute the results """
622631
# dispatch to agg

pandas/tests/apply/test_frame_apply.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -1415,11 +1415,21 @@ def test_non_callable_aggregates(how):
14151415

14161416
tm.assert_series_equal(result, expected)
14171417

1418-
# Just a string attribute arg same as calling df.arg
1419-
result = getattr(df, how)("size")
1420-
expected = df.size
14211418

1422-
assert result == expected
1419+
@pytest.mark.parametrize("how", ["agg", "apply"])
1420+
def test_size_as_str(how, axis):
1421+
# GH 39934
1422+
df = DataFrame(
1423+
{"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
1424+
)
1425+
# Just a string attribute arg same as calling df.arg
1426+
# on the columns
1427+
result = getattr(df, how)("size", axis=axis)
1428+
if axis == 0 or axis == "index":
1429+
expected = Series(df.shape[0], index=df.columns, name="size")
1430+
else:
1431+
expected = Series(df.shape[1], index=df.index, name="size")
1432+
tm.assert_series_equal(result, expected)
14231433

14241434

14251435
def test_agg_listlike_result():

0 commit comments

Comments
 (0)