Skip to content

Commit 24394e1

Browse files
committed
BUG: fix panel bin-grouping aggregation with custom functions. close #2537
1 parent e559e87 commit 24394e1

File tree

6 files changed

+72
-22
lines changed

6 files changed

+72
-22
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ pandas 0.10.0
236236
- Fix several DataFrame.icol/irow with duplicate indices issues (GH2228_, GH2259_)
237237
- Use Series names for column names when using concat with axis=1 (GH2489_)
238238
- Raise Exception if start, end, periods all passed to date_range (GH2538_)
239+
- Fix Panel resampling issue (GH2537_)
239240

240241
.. _GH407: https://github.com/pydata/pandas/issues/407
241242
.. _GH821: https://github.com/pydata/pandas/issues/821
@@ -356,6 +357,7 @@ pandas 0.10.0
356357
.. _GH2259: https://github.com/pydata/pandas/issues/2259
357358
.. _GH2489: https://github.com/pydata/pandas/issues/2489
358359
.. _GH2538: https://github.com/pydata/pandas/issues/2538
360+
.. _GH2537: https://github.com/pydata/pandas/issues/2537
359361

360362

361363
pandas 0.9.1

pandas/core/groupby.py

+34-14
Original file line numberDiff line numberDiff line change
@@ -942,16 +942,26 @@ def get_iterator(self, data, axis=0):
942942
Generator yielding sequence of (name, subsetted object)
943943
for each group
944944
"""
945-
if axis == 1:
946-
raise NotImplementedError
945+
if axis == 0:
946+
start = 0
947+
for edge, label in izip(self.bins, self.binlabels):
948+
yield label, data[start:edge]
949+
start = edge
950+
951+
if edge < len(data):
952+
yield self.binlabels[-1], data[edge:]
953+
else:
954+
start = 0
955+
for edge, label in izip(self.bins, self.binlabels):
956+
inds = range(start, edge)
957+
yield label, data.take(inds, axis=axis)
958+
start = edge
947959

948-
start = 0
949-
for edge, label in izip(self.bins, self.binlabels):
950-
yield label, data[start:edge]
951-
start = edge
960+
n = len(data.axes[axis])
961+
if edge < n:
962+
inds = range(edge, n)
963+
yield self.binlabels[-1], data.take(inds, axis=axis)
952964

953-
if edge < len(data):
954-
yield self.binlabels[-1], data[edge:]
955965

956966
def apply(self, f, data, axis=0, keep_internal=False):
957967
result_keys = []
@@ -1652,8 +1662,9 @@ def _aggregate_generic(self, func, *args, **kwargs):
16521662
result = {}
16531663
if axis != obj._het_axis:
16541664
try:
1655-
for name in self.indices:
1656-
data = self.get_group(name, obj=obj)
1665+
for name, data in self:
1666+
# for name in self.indices:
1667+
# data = self.get_group(name, obj=obj)
16571668
result[name] = func(data, *args, **kwargs)
16581669
except Exception:
16591670
return self._aggregate_item_by_item(func, *args, **kwargs)
@@ -1993,13 +2004,22 @@ def aggregate(self, arg, *args, **kwargs):
19932004
return self._aggregate_generic(arg, *args, **kwargs)
19942005

19952006
def _wrap_generic_output(self, result, obj):
1996-
new_axes = list(obj.axes)
1997-
new_axes[self.axis] = self.grouper.result_index
2007+
if self.axis == 0:
2008+
new_axes = list(obj.axes)
2009+
new_axes[0] = self.grouper.result_index
2010+
elif self.axis == 1:
2011+
x, y, z = obj.axes
2012+
new_axes = [self.grouper.result_index, z, x]
2013+
else:
2014+
x, y, z = obj.axes
2015+
new_axes = [self.grouper.result_index, y, x]
19982016

19992017
result = Panel._from_axes(result, new_axes)
20002018

2001-
if self.axis > 0:
2002-
result = result.swapaxes(0, self.axis)
2019+
if self.axis == 1:
2020+
result = result.swapaxes(0, 1).swapaxes(0, 2)
2021+
elif self.axis == 2:
2022+
result = result.swapaxes(0, 2)
20032023

20042024
return result
20052025

pandas/core/index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ def delete(self, loc):
11561156
-------
11571157
new_index : Index
11581158
"""
1159-
arr = np.delete(np.asarray(self), loc)
1159+
arr = np.delete(self.values, loc)
11601160
return Index(arr)
11611161

11621162
def insert(self, loc, item):

pandas/core/internals.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,13 @@ def __init__(self, values, items, ref_items, ndim=2):
2727
if values.ndim != ndim:
2828
raise AssertionError('Wrong number of dimensions')
2929

30-
if len(items) != len(values):
31-
raise AssertionError('Wrong number of items passed')
30+
nitems = len(items)
31+
nvalues = len(values)
32+
if nitems != nvalues:
33+
import pdb
34+
pdb.set_trace()
35+
raise AssertionError('Wrong number of items passed (%d vs %d)'
36+
% (len(items), len(values)))
3237

3338
self._ref_locs = None
3439
self.values = values

pandas/core/panel.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,9 @@ def _init_dict(self, data, axes, dtype=None):
283283
data[k] = self._constructor_sliced(v)
284284

285285
# extract axis for remaining axes & create the slicemap
286-
raxes = [ self._extract_axis(self, data, axis=i) if a is None else a for i, a in enumerate(axes) ]
287-
raxes_sm = self._extract_axes_for_slice(self, raxes)
286+
raxes = [self._extract_axis(self, data, axis=i)
287+
if a is None else a for i, a in enumerate(axes)]
288+
raxes_sm = self._extract_axes_for_slice(self, raxes)
288289

289290
# shallow copy
290291
arrays = []

pandas/tseries/tests/test_resample.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -371,10 +371,15 @@ def test_resample_panel_numpy(self):
371371
major_axis=rng,
372372
minor_axis=['a', 'b', 'c', 'd', 'e'])
373373

374-
result = panel.resample('M', how=lambda x: x.mean(), axis=1)
374+
result = panel.resample('M', how=lambda x: x.mean(1), axis=1)
375375
expected = panel.resample('M', how='mean', axis=1)
376376
tm.assert_panel_equal(result, expected)
377377

378+
panel = panel.swapaxes(1, 2)
379+
result = panel.resample('M', how=lambda x: x.mean(2), axis=2)
380+
expected = panel.resample('M', how='mean', axis=2)
381+
tm.assert_panel_equal(result, expected)
382+
378383
def test_resample_anchored_ticks(self):
379384
# If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
380385
# "anchor" the origin at midnight so we get regular intervals rather
@@ -1013,7 +1018,24 @@ def test_apply_iteration(self):
10131018
result = grouped.apply(f)
10141019
self.assertTrue(result.index.equals(df.index))
10151020

1021+
def test_panel_aggregation(self):
1022+
ind = pd.date_range('1/1/2000', periods=100)
1023+
data = np.random.randn(2,len(ind),4)
1024+
wp = pd.Panel(data, items=['Item1', 'Item2'], major_axis=ind,
1025+
minor_axis=['A', 'B', 'C', 'D'])
1026+
1027+
tg = TimeGrouper('M', axis=1)
1028+
grouper = tg.get_grouper(wp)
1029+
bingrouped = wp.groupby(grouper)
1030+
binagg = bingrouped.mean()
1031+
1032+
def f(x):
1033+
assert(isinstance(x, Panel))
1034+
return x.mean(1)
1035+
result = bingrouped.agg(f)
1036+
tm.assert_panel_equal(result, binagg)
1037+
1038+
10161039
if __name__ == '__main__':
1017-
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure',
1018-
'--with-timer'],
1040+
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
10191041
exit=False)

0 commit comments

Comments
 (0)