Skip to content

Commit 20256e9

Browse files
committed
return MI series
1 parent 3d36a99 commit 20256e9

File tree

5 files changed

+88
-41
lines changed

5 files changed

+88
-41
lines changed

doc/source/basics.rst

+6-4
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,7 @@ Multiple functions yield multiple rows.
890890
891891
tsdf.agg(['sum', 'mean'])
892892
893-
On a Series, multiple functions return a Series.
893+
On a Series, multiple functions return a Series, indexed by the function names.
894894

895895
.. ipython:: python
896896
@@ -914,7 +914,8 @@ of all of the aggregators; some may be missing values.
914914
915915
tsdf.agg({'A': ['mean', 'min'], 'B': 'sum'})
916916
917-
For a Series, you can pass a dict; the keys will set the name of the column
917+
For a Series, you can pass a dict. You will get back a MultiIndex Series; The outer level will
918+
be the keys, the inner the name of the functions.
918919

919920
.. ipython:: python
920921
@@ -924,9 +925,10 @@ Alternatively, using multiple dictionaries, you can have renamed elements with t
924925

925926
.. ipython:: python
926927
927-
tsdf.A.agg({'foo' : 'sum', 'bar':'mean'})
928+
tsdf.A.agg({'foo' : 'sum', 'bar': 'mean'})
928929
929-
Multiple keys will yield multiple columns.
930+
Multiple keys will yield a MultiIndex Series. The outer level will be the keys, the inner
931+
the names of the functions.
930932

931933
.. ipython:: python
932934

doc/source/whatsnew/v0.20.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ Dictionaries to provide the ability to selective calculation.
6464
df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
6565

6666
When operating on a Series, passing a dictionry allows one to rename multiple
67-
function aggregates; this will return a DataFrame.
67+
function aggregates; this will return a MultiIndexed Series. The outer level
68+
are the keys, the inner are the names of the functions.
6869

6970
.. ipython:: python
7071

pandas/core/base.py

+54-12
Original file line numberDiff line numberDiff line change
@@ -581,18 +581,53 @@ def _agg(arg, func):
581581
result = _agg(arg, _agg_2dim)
582582

583583
# combine results
584+
585+
def is_any_series():
586+
# return a boolean if we have *any* nested series
587+
return any([isinstance(r, ABCSeries)
588+
for r in compat.itervalues(result)])
589+
590+
def is_any_frame():
591+
# return a boolean if we have *any* nested series
592+
return any([isinstance(r, ABCDataFrame)
593+
for r in compat.itervalues(result)])
594+
584595
if isinstance(result, list):
585-
result = concat(result, keys=keys, axis=1)
586-
elif isinstance(list(compat.itervalues(result))[0],
587-
ABCDataFrame):
588-
result = concat([result[k] for k in keys], keys=keys, axis=1)
589-
else:
590-
from pandas import DataFrame, Series
596+
return concat(result, keys=keys, axis=1), True
597+
598+
elif is_any_frame():
599+
# we have a dict of DataFrames
600+
# return a MI DataFrame
601+
602+
return concat([result[k] for k in keys],
603+
keys=keys, axis=1), True
604+
605+
elif isinstance(self, ABCSeries) and is_any_series():
606+
607+
# we have a dict of Series
608+
# return a MI Series
591609
try:
592-
result = DataFrame(result)
593-
except ValueError:
594-
# we have a dict of scalars
595-
result = Series(result, name=getattr(self, 'name', None))
610+
result = concat(result)
611+
except TypeError:
612+
# we want to give a nice error here if
613+
# we have non-same sized objects, so
614+
# we don't automatically broadcast
615+
616+
raise ValueError("cannot perform both aggregation "
617+
"and transformation operations "
618+
"simultaneously")
619+
620+
return result, True
621+
622+
# fall thru
623+
from pandas import DataFrame, Series
624+
try:
625+
result = DataFrame(result)
626+
except ValueError:
627+
628+
# we have a dict of scalars
629+
result = Series(result,
630+
name=getattr(self, 'name', None))
596631

597632
return result, True
598633
elif is_list_like(arg) and arg not in compat.string_types:
@@ -651,10 +686,17 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis):
651686
except SpecificationError:
652687
raise
653688

689+
# if we are empty
690+
if not len(results):
691+
raise ValueError("no results")
692+
654693
try:
655694
return concat(results, keys=keys, axis=1)
656-
except (TypeError, ValueError):
657-
# shape change
695+
except TypeError:
696+
697+
# we are concatting non-NDFrame objects,
698+
# e.g. a list of scalars
699+
658700
from pandas.types.cast import _is_nested_object
659701
from pandas import Series
660702
result = Series(results, index=keys, name=self.name)

pandas/tests/series/test_apply.py

+22-22
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,10 @@ def test_transform(self):
189189
# dict, provide renaming
190190
expected = pd.concat([f_sqrt, f_abs], axis=1)
191191
expected.columns = ['foo', 'bar']
192+
expected = expected.unstack().rename('series')
193+
192194
result = self.series.apply({'foo': np.sqrt, 'bar': np.abs})
193-
assert_frame_equal(result.reindex_like(expected), expected)
195+
assert_series_equal(result.reindex_like(expected), expected)
194196

195197
def test_transform_and_agg_error(self):
196198
# we are trying to transform with an aggregator
@@ -208,43 +210,41 @@ def f():
208210
self.series.transform(['sqrt', 'max'])
209211
self.assertRaises(ValueError, f)
210212

211-
def test_todo(self):
212-
# TODO
213-
# This should raise, column selection is not possible here
214-
# and renaming doesn't make sense
215-
# result = self.series.apply({'foo': ['sqrt', 'log']})
216-
217-
# TODO
218-
# difference in apply / agg semantics when passing a
219-
# row-wise aggregator
220-
pass
213+
def f():
214+
with np.errstate(all='ignore'):
215+
self.series.agg({'foo': np.sqrt, 'bar': 'sum'})
216+
self.assertRaises(ValueError, f)
221217

222218
def test_demo(self):
223219
# demonstration tests
224-
s = Series(range(6), dtype='int64')
220+
s = Series(range(6), dtype='int64', name='series')
225221

226222
result = s.agg(['min', 'max'])
227-
expected = Series([0, 5], index=['min', 'max'])
223+
expected = Series([0, 5], index=['min', 'max'], name='series')
228224
tm.assert_series_equal(result, expected)
229225

230226
result = s.agg({'foo': 'min'})
231-
expected = Series([0], index=['foo'])
227+
expected = Series([0], index=['foo'], name='series')
232228
tm.assert_series_equal(result, expected)
233229

234230
result = s.agg({'foo': ['min', 'max']})
235-
expected = DataFrame({'foo': [0, 5]}, index=['min', 'max'])
236-
tm.assert_frame_equal(result, expected)
231+
expected = DataFrame(
232+
{'foo': [0, 5]},
233+
index=['min', 'max']).unstack().rename('series')
234+
tm.assert_series_equal(result, expected)
237235

238236
def test_multiple_aggregators_with_dict_api(self):
239237

240-
s = Series(range(6), dtype='int64')
238+
s = Series(range(6), dtype='int64', name='series')
241239
result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})
242240

243-
expected = DataFrame({'foo': [5.0, np.nan, 0.0, np.nan],
244-
'bar': [np.nan, 2.5, np.nan, 15.0]},
245-
columns=['foo', 'bar'],
246-
index=['max', 'mean', 'min', 'sum'])
247-
tm.assert_frame_equal(result.reindex_like(expected), expected)
241+
expected = DataFrame(
242+
{'foo': [5.0, np.nan, 0.0, np.nan],
243+
'bar': [np.nan, 2.5, np.nan, 15.0]},
244+
columns=['foo', 'bar'],
245+
index=['max', 'mean',
246+
'min', 'sum']).unstack().rename('series')
247+
tm.assert_series_equal(result.reindex_like(expected), expected)
248248

249249
def test_agg_apply_evaluate_lambdas_the_same(self):
250250
# test that we are evaluating row-by-row first

pandas/tseries/tests/test_resample.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1777,10 +1777,12 @@ def test_how_lambda_functions(self):
17771777
tm.assert_series_equal(result['foo'], foo_exp)
17781778
tm.assert_series_equal(result['bar'], bar_exp)
17791779

1780+
# this is a MI Series, so comparing the names of the results
1781+
# doesn't make sense
17801782
result = ts.resample('M').aggregate({'foo': lambda x: x.mean(),
17811783
'bar': lambda x: x.std(ddof=1)})
1782-
tm.assert_series_equal(result['foo'], foo_exp)
1783-
tm.assert_series_equal(result['bar'], bar_exp)
1784+
tm.assert_series_equal(result['foo'], foo_exp, check_names=False)
1785+
tm.assert_series_equal(result['bar'], bar_exp, check_names=False)
17841786

17851787
def test_resample_unequal_times(self):
17861788
# #1772

0 commit comments

Comments
 (0)