Skip to content

Commit 62af1ee

Browse files
committed
fix nuiscance column / string aggs
closes pandas-dev#15015
1 parent 20256e9 commit 62af1ee

File tree

5 files changed

+79
-23
lines changed

5 files changed

+79
-23
lines changed

doc/source/basics.rst

+21-1
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,27 @@ the names of the functions.
934934
935935
tsdf.A.agg({'foo' : ['sum', 'mean'], 'bar': ['min', 'max', lambda x: x.sum()+1]})
936936
937-
.. _basics.custom_describe:
937+
.. _basics.aggregation.mixed_dtypes:
938+
939+
Mixed Dtypes
940+
++++++++++++
941+
942+
When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid
943+
aggregations. This is similiar to how groupby ``.agg`` works.
944+
945+
.. ipython:: python
946+
947+
mdf = pd.DataFrame({'A': [1, 2, 3],
948+
'B': [1., 2., 3.],
949+
'C': ['foo', 'bar', 'baz'],
950+
'D': pd.date_range('20130101', periods=3)})
951+
mdf.dtypes
952+
953+
.. ipython:: python
954+
955+
mdf.agg(['min', 'sum'])
956+
957+
.. _basics.aggregation.custom_describe:
938958

939959
Custom describe
940960
+++++++++++++++

doc/source/whatsnew/v0.20.0.txt

+14
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,20 @@ The API also supports a ``.transform()`` function to provide for broadcasting re
7777

7878
df.transform(['abs', lambda x: x-x.min()])
7979

80+
When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid
81+
aggregations. This is similiar to how groupby ``.agg`` works. (:issue:`15015`)
82+
83+
.. ipython:: python
84+
85+
df = pd.DataFrame({'A': [1, 2, 3],
86+
'B': [1., 2., 3.],
87+
'C': ['foo', 'bar', 'baz'],
88+
'D': pd.date_range('20130101', periods=3)})
89+
df.dtypes
90+
91+
.. ipython:: python
92+
93+
df.agg(['min', 'sum'])
8094

8195
.. _whatsnew_0200.enhancements.dataio_dtype:
8296

pandas/core/base.py

+3
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,9 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis):
683683
keys.append(col)
684684
except (TypeError, DataError):
685685
pass
686+
except ValueError:
687+
# cannot aggregate
688+
continue
686689
except SpecificationError:
687690
raise
688691

pandas/core/frame.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -4128,6 +4128,7 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None,
41284128
applied : Series or DataFrame
41294129
"""
41304130
axis = self._get_axis_number(axis)
4131+
ignore_failures = kwds.pop('ignore_failures', False)
41314132

41324133
# dispatch to agg
41334134
if axis == 0 and isinstance(func, (list, dict)):
@@ -4164,7 +4165,9 @@ def f(x):
41644165
else:
41654166
if reduce is None:
41664167
reduce = True
4167-
return self._apply_standard(f, axis, reduce=reduce)
4168+
return self._apply_standard(f, axis,
4169+
reduce=reduce,
4170+
ignore_failures=ignore_failures)
41684171
else:
41694172
return self._apply_broadcast(f, axis)
41704173

@@ -4968,7 +4971,11 @@ def f(x):
49684971
# this can end up with a non-reduction
49694972
# but not always. if the types are mixed
49704973
# with datelike then need to make sure a series
4971-
result = self.apply(f, reduce=False)
4974+
4975+
# we only end up here if we have not specified numeric_only
4976+
# and yet we have tried a column-by-column reduction, where
4977+
# we have mixed type. So let's just do what we can
4978+
result = self.apply(f, reduce=False, ignore_failures=True)
49724979
if result.ndim == self.ndim:
49734980
result = result.iloc[0]
49744981
return result

pandas/tests/frame/test_apply.py

+32-20
Original file line numberDiff line numberDiff line change
@@ -533,26 +533,10 @@ def f():
533533
self.frame.transform(['max', 'sqrt'])
534534
self.assertRaises(ValueError, f)
535535

536-
def test_agg_todo(self):
537-
# if we have a transforming function & a reducer
538-
# we have to resolve
539-
# TODO
540-
# result = self.frame.apply([np.sqrt, np.mean])
541-
542-
# TODO
543-
# difference in apply / agg semantics when passing a
544-
# row-wise aggregator
545-
pass
546-
547-
def test_broken(self):
548-
# TODO
549-
# df = pd.DataFrame({'A': range(5), 'B': 5})
550-
# result = df.agg({'A':['abs', 'sum'], 'B':['mean','max']})
551-
# expected = DataFrame({'A': [0, 4], 'B': [5, 5]},
552-
# columns=['A', 'B'],
553-
# index=['min', 'max'])
554-
# tm.assert_frame_equal(result, expected)
555-
pass
536+
df = pd.DataFrame({'A': range(5), 'B': 5})
537+
def f():
538+
with np.errstate(all='ignore'):
539+
df.agg({'A':['abs', 'sum'], 'B':['mean','max']})
556540

557541
def test_demo(self):
558542
# demonstration tests
@@ -604,3 +588,31 @@ def test_agg_reduce(self):
604588
self.frame.B.max()],
605589
index=['sum', 'max'])})
606590
assert_frame_equal(result.reindex_like(expected), expected)
591+
592+
def test_nuiscance_columns(self):
593+
594+
# GH 15015
595+
df = DataFrame({'A': [1, 2, 3],
596+
'B': [1., 2., 3.],
597+
'C': ['foo', 'bar', 'baz'],
598+
'D': pd.date_range('20130101', periods=3)})
599+
600+
result = df.agg('min')
601+
expected = Series([1, 1., 'bar', pd.Timestamp('20130101')],
602+
index=df.columns)
603+
assert_series_equal(result, expected)
604+
605+
result = df.agg(['min'])
606+
expected = DataFrame([[1, 1., 'bar', pd.Timestamp('20130101')]],
607+
index=['min'], columns=df.columns)
608+
assert_frame_equal(result, expected)
609+
610+
result = df.agg('sum')
611+
expected = Series([6, 6., 'foobarbaz'],
612+
index=['A', 'B', 'C'])
613+
assert_series_equal(result, expected)
614+
615+
result = df.agg(['sum'])
616+
expected = DataFrame([[6, 6., 'foobarbaz']],
617+
index=['sum'], columns=['A', 'B', 'C'])
618+
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)