Skip to content

Commit 9c15c5c

Browse files
committed
fix nuiscance column / string aggs
closes pandas-dev#15015
1 parent 20256e9 commit 9c15c5c

File tree

5 files changed

+83
-23
lines changed

5 files changed

+83
-23
lines changed

doc/source/basics.rst

+21-1
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,27 @@ the names of the functions.
934934
935935
tsdf.A.agg({'foo' : ['sum', 'mean'], 'bar': ['min', 'max', lambda x: x.sum()+1]})
936936
937-
.. _basics.custom_describe:
937+
.. _basics.aggregation.mixed_dtypes:
938+
939+
Mixed Dtypes
940+
++++++++++++
941+
942+
When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid
943+
aggregations. This is similiar to how groupby ``.agg`` works.
944+
945+
.. ipython:: python
946+
947+
mdf = pd.DataFrame({'A': [1, 2, 3],
948+
'B': [1., 2., 3.],
949+
'C': ['foo', 'bar', 'baz'],
950+
'D': pd.date_range('20130101', periods=3)})
951+
mdf.dtypes
952+
953+
.. ipython:: python
954+
955+
mdf.agg(['min', 'sum'])
956+
957+
.. _basics.aggregation.custom_describe:
938958

939959
Custom describe
940960
+++++++++++++++

doc/source/whatsnew/v0.20.0.txt

+14
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,20 @@ The API also supports a ``.transform()`` function to provide for broadcasting re
7777

7878
df.transform(['abs', lambda x: x-x.min()])
7979

80+
When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid
81+
aggregations. This is similiar to how groupby ``.agg`` works. (:issue:`15015`)
82+
83+
.. ipython:: python
84+
85+
df = pd.DataFrame({'A': [1, 2, 3],
86+
'B': [1., 2., 3.],
87+
'C': ['foo', 'bar', 'baz'],
88+
'D': pd.date_range('20130101', periods=3)})
89+
df.dtypes
90+
91+
.. ipython:: python
92+
93+
df.agg(['min', 'sum'])
8094

8195
.. _whatsnew_0200.enhancements.dataio_dtype:
8296

pandas/core/base.py

+3
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,9 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis):
683683
keys.append(col)
684684
except (TypeError, DataError):
685685
pass
686+
except ValueError:
687+
# cannot aggregate
688+
continue
686689
except SpecificationError:
687690
raise
688691

pandas/core/frame.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -4128,6 +4128,7 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None,
41284128
applied : Series or DataFrame
41294129
"""
41304130
axis = self._get_axis_number(axis)
4131+
ignore_failures = kwds.pop('ignore_failures', False)
41314132

41324133
# dispatch to agg
41334134
if axis == 0 and isinstance(func, (list, dict)):
@@ -4164,7 +4165,10 @@ def f(x):
41644165
else:
41654166
if reduce is None:
41664167
reduce = True
4167-
return self._apply_standard(f, axis, reduce=reduce)
4168+
return self._apply_standard(
4169+
f, axis,
4170+
reduce=reduce,
4171+
ignore_failures=ignore_failures)
41684172
else:
41694173
return self._apply_broadcast(f, axis)
41704174

@@ -4968,7 +4972,13 @@ def f(x):
49684972
# this can end up with a non-reduction
49694973
# but not always. if the types are mixed
49704974
# with datelike then need to make sure a series
4971-
result = self.apply(f, reduce=False)
4975+
4976+
# we only end up here if we have not specified
4977+
# numeric_only and yet we have tried a
4978+
# column-by-column reduction, where we have mixed type.
4979+
# So let's just do what we can
4980+
result = self.apply(f, reduce=False,
4981+
ignore_failures=True)
49724982
if result.ndim == self.ndim:
49734983
result = result.iloc[0]
49744984
return result

pandas/tests/frame/test_apply.py

+33-20
Original file line numberDiff line numberDiff line change
@@ -533,26 +533,11 @@ def f():
533533
self.frame.transform(['max', 'sqrt'])
534534
self.assertRaises(ValueError, f)
535535

536-
def test_agg_todo(self):
537-
# if we have a transforming function & a reducer
538-
# we have to resolve
539-
# TODO
540-
# result = self.frame.apply([np.sqrt, np.mean])
541-
542-
# TODO
543-
# difference in apply / agg semantics when passing a
544-
# row-wise aggregator
545-
pass
546-
547-
def test_broken(self):
548-
# TODO
549-
# df = pd.DataFrame({'A': range(5), 'B': 5})
550-
# result = df.agg({'A':['abs', 'sum'], 'B':['mean','max']})
551-
# expected = DataFrame({'A': [0, 4], 'B': [5, 5]},
552-
# columns=['A', 'B'],
553-
# index=['min', 'max'])
554-
# tm.assert_frame_equal(result, expected)
555-
pass
536+
df = pd.DataFrame({'A': range(5), 'B': 5})
537+
538+
def f():
539+
with np.errstate(all='ignore'):
540+
df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']})
556541

557542
def test_demo(self):
558543
# demonstration tests
@@ -604,3 +589,31 @@ def test_agg_reduce(self):
604589
self.frame.B.max()],
605590
index=['sum', 'max'])})
606591
assert_frame_equal(result.reindex_like(expected), expected)
592+
593+
def test_nuiscance_columns(self):
594+
595+
# GH 15015
596+
df = DataFrame({'A': [1, 2, 3],
597+
'B': [1., 2., 3.],
598+
'C': ['foo', 'bar', 'baz'],
599+
'D': pd.date_range('20130101', periods=3)})
600+
601+
result = df.agg('min')
602+
expected = Series([1, 1., 'bar', pd.Timestamp('20130101')],
603+
index=df.columns)
604+
assert_series_equal(result, expected)
605+
606+
result = df.agg(['min'])
607+
expected = DataFrame([[1, 1., 'bar', pd.Timestamp('20130101')]],
608+
index=['min'], columns=df.columns)
609+
assert_frame_equal(result, expected)
610+
611+
result = df.agg('sum')
612+
expected = Series([6, 6., 'foobarbaz'],
613+
index=['A', 'B', 'C'])
614+
assert_series_equal(result, expected)
615+
616+
result = df.agg(['sum'])
617+
expected = DataFrame([[6, 6., 'foobarbaz']],
618+
index=['sum'], columns=['A', 'B', 'C'])
619+
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)