Skip to content

Commit d74bd31

Browse files
woleverjreback
authored andcommitted
Add is_reduction argument to DataFrame.apply
Use 'reduce' argument instead of adding 'is_reduction' argument Fix apply on empty DataFrame returns DataFrame Add changelog notes and examples
1 parent 9ab2385 commit d74bd31

File tree

4 files changed

+79
-18
lines changed

4 files changed

+79
-18
lines changed

doc/source/release.rst

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ API Changes
6767
- Add ``-NaN`` and ``-nan`` to the default set of NA values
6868
(:issue:`5952`). See :ref:`NA Values <io.na_values>`.
6969
- ``NDFrame`` now has an ``equals`` method. (:issue:`5283`)
70+
- ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a
71+
``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is
72+
empty (:issue:`6007`).
7073

7174
Experimental Features
7275
~~~~~~~~~~~~~~~~~~~~~

doc/source/v0.13.1.txt

+30
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ API changes
4242

4343
- Add ``-NaN`` and ``-nan`` to the default set of NA values (:issue:`5952`).
4444
See :ref:`NA Values <io.na_values>`.
45+
4546
- Added the ``NDFrame.equals()`` method to compare if two NDFrames are
4647
equal have equal axes, dtypes, and values. Added the
4748
``array_equivalent`` function to compare if two ndarrays are
@@ -58,6 +59,35 @@ API changes
5859
com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan]))
5960
np.array_equal(np.array([0, np.nan]), np.array([0, np.nan]))
6061

62+
- ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a
63+
``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is
64+
empty (:issue:`6007`).
65+
66+
Previously, calling ``DataFrame.apply`` an empty ``DataFrame`` would return
67+
either a ``DataFrame`` if there were no columns, or the function being
68+
applied would be called with an empty ``Series`` to guess whether a
69+
``Series`` or ``DataFrame`` should be returned:
70+
71+
.. ipython:: python
72+
73+
def applied_func(col):
74+
print "Apply function being called with:", col
75+
return col.sum()
76+
77+
empty = DataFrame(columns=['a', 'b'])
78+
empty.apply(applied_func)
79+
80+
Now, when ``apply`` is called on an empty ``DataFrame``: if the ``reduce``
81+
argument is ``True`` a ``Series`` will returned, if it is ``False`` a
82+
``DataFrame`` will be returned, and if it is ``None`` (the default) the
83+
function being applied will be called with an empty series to try and guess
84+
the return type.
85+
86+
.. ipython:: python
87+
88+
empty.apply(applied_func, reduce=True)
89+
empty.apply(applied_func, reduce=False)
90+
6191
Prior Version Deprecations/Changes
6292
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6393

pandas/core/frame.py

+30-18
Original file line numberDiff line numberDiff line change
@@ -3234,14 +3234,15 @@ def diff(self, periods=1):
32343234
#----------------------------------------------------------------------
32353235
# Function application
32363236

3237-
def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True,
3237+
def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None,
32383238
args=(), **kwds):
32393239
"""
32403240
Applies function along input axis of DataFrame.
32413241
32423242
Objects passed to functions are Series objects having index
32433243
either the DataFrame's index (axis=0) or the columns (axis=1).
3244-
Return type depends on whether passed function aggregates
3244+
Return type depends on whether passed function aggregates, or the
3245+
reduce argument if the DataFrame is empty.
32453246
32463247
Parameters
32473248
----------
@@ -3253,8 +3254,14 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True,
32533254
broadcast : boolean, default False
32543255
For aggregation functions, return object of same size with values
32553256
propagated
3256-
reduce : boolean, default True
3257-
Try to apply reduction procedures
3257+
reduce : boolean or None, default None
3258+
Try to apply reduction procedures. If the DataFrame is empty,
3259+
apply will use reduce to determine whether the result should be a
3260+
Series or a DataFrame. If reduce is None (the default), apply's
3261+
return value will be guessed by calling func an empty Series (note:
3262+
while guessing, exceptions raised by func will be ignored). If
3263+
reduce is True a Series will always be returned, and if False a
3264+
DataFrame will always be returned.
32583265
raw : boolean, default False
32593266
If False, convert each row or column into a Series. If raw=True the
32603267
passed function will receive ndarray objects instead. If you are
@@ -3279,41 +3286,46 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True,
32793286
-------
32803287
applied : Series or DataFrame
32813288
"""
3282-
if len(self.columns) == 0 and len(self.index) == 0:
3283-
return self
3284-
32853289
axis = self._get_axis_number(axis)
32863290
if kwds or args and not isinstance(func, np.ufunc):
32873291
f = lambda x: func(x, *args, **kwds)
32883292
else:
32893293
f = func
32903294

3295+
if len(self.columns) == 0 and len(self.index) == 0:
3296+
return self._apply_empty_result(func, axis, reduce)
3297+
32913298
if isinstance(f, np.ufunc):
32923299
results = f(self.values)
32933300
return self._constructor(data=results, index=self.index,
32943301
columns=self.columns, copy=False)
32953302
else:
32963303
if not broadcast:
32973304
if not all(self.shape):
3298-
# How to determine this better?
3299-
is_reduction = False
3300-
try:
3301-
is_reduction = not isinstance(f(_EMPTY_SERIES), Series)
3302-
except Exception:
3303-
pass
3304-
3305-
if is_reduction:
3306-
return Series(NA, index=self._get_agg_axis(axis))
3307-
else:
3308-
return self.copy()
3305+
return self._apply_empty_result(func, axis, reduce)
33093306

33103307
if raw and not self._is_mixed_type:
33113308
return self._apply_raw(f, axis)
33123309
else:
3310+
if reduce is None:
3311+
reduce = True
33133312
return self._apply_standard(f, axis, reduce=reduce)
33143313
else:
33153314
return self._apply_broadcast(f, axis)
33163315

3316+
def _apply_empty_result(self, func, axis, reduce):
3317+
if reduce is None:
3318+
reduce = False
3319+
try:
3320+
reduce = not isinstance(func(_EMPTY_SERIES), Series)
3321+
except Exception:
3322+
pass
3323+
3324+
if reduce:
3325+
return Series(NA, index=self._get_agg_axis(axis))
3326+
else:
3327+
return self.copy()
3328+
33173329
def _apply_raw(self, func, axis):
33183330
try:
33193331
result = lib.reduce(self.values, func, axis=axis)

pandas/tests/test_frame.py

+16
Original file line numberDiff line numberDiff line change
@@ -8992,6 +8992,22 @@ def test_apply_empty(self):
89928992
rs = xp.apply(lambda x: x['a'], axis=1)
89938993
assert_frame_equal(xp, rs)
89948994

8995+
# reduce with an empty DataFrame
8996+
x = []
8997+
result = self.empty.apply(x.append, axis=1, reduce=False)
8998+
assert_frame_equal(result, self.empty)
8999+
result = self.empty.apply(x.append, axis=1, reduce=True)
9000+
assert_series_equal(result, Series([]))
9001+
9002+
empty_with_cols = DataFrame(columns=['a', 'b', 'c'])
9003+
result = empty_with_cols.apply(x.append, axis=1, reduce=False)
9004+
assert_frame_equal(result, empty_with_cols)
9005+
result = empty_with_cols.apply(x.append, axis=1, reduce=True)
9006+
assert_series_equal(result, Series([]))
9007+
9008+
# Ensure that x.append hasn't been called
9009+
self.assertEqual(x, [])
9010+
89959011
def test_apply_standard_nonunique(self):
89969012
df = DataFrame(
89979013
[[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])

0 commit comments

Comments
 (0)