Skip to content

Commit 2cda234

Browse files
committed
ENH: hack job but panel resampling with NumPy function works, close #1149
1 parent 0a9d496 commit 2cda234

File tree

3 files changed

+93
-54
lines changed

3 files changed

+93
-54
lines changed

pandas/core/groupby.py

+90-52
Original file line numberDiff line numberDiff line change
@@ -1421,7 +1421,7 @@ def _aggregate_multiple_funcs(self, arg):
14211421
try:
14221422
colg = SeriesGroupBy(obj[col], column=col,
14231423
grouper=self.grouper)
1424-
results.append(colg.agg(arg))
1424+
results.append(colg.aggregate(arg))
14251425
keys.append(col)
14261426
except (TypeError, GroupByError):
14271427
pass
@@ -1437,7 +1437,7 @@ def _aggregate_generic(self, func, *args, **kwargs):
14371437
obj = self._obj_with_exclusions
14381438

14391439
result = {}
1440-
if axis == 0:
1440+
if axis != obj._het_axis:
14411441
try:
14421442
for name in self.indices:
14431443
data = self.get_group(name, obj=obj)
@@ -1453,19 +1453,10 @@ def _aggregate_generic(self, func, *args, **kwargs):
14531453
wrapper = lambda x: func(x, *args, **kwargs)
14541454
result[name] = data.apply(wrapper, axis=axis)
14551455

1456-
result_index = self.grouper.levels[0]
1457-
1458-
if result:
1459-
if axis == 0:
1460-
result = DataFrame(result, index=obj.columns,
1461-
columns=result_index).T
1462-
else:
1463-
result = DataFrame(result, index=obj.index,
1464-
columns=result_index)
1465-
else:
1466-
result = DataFrame(result)
1456+
return self._wrap_generic_output(result, obj)
14671457

1468-
return result
1458+
def _wrap_aggregated_output(self, output, names=None):
1459+
raise NotImplementedError
14691460

14701461
def _aggregate_item_by_item(self, func, *args, **kwargs):
14711462
# only for axis==0
@@ -1477,7 +1468,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
14771468
try:
14781469
colg = SeriesGroupBy(obj[item], column=item,
14791470
grouper=self.grouper)
1480-
result[item] = colg.agg(func, *args, **kwargs)
1471+
result[item] = colg.aggregate(func, *args, **kwargs)
14811472
except (ValueError, TypeError):
14821473
cannot_agg.append(item)
14831474
continue
@@ -1488,39 +1479,21 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
14881479

14891480
return DataFrame(result, columns=result_columns)
14901481

1491-
def _wrap_aggregated_output(self, output, names=None):
1492-
agg_axis = 0 if self.axis == 1 else 1
1493-
agg_labels = self._obj_with_exclusions._get_axis(agg_axis)
1494-
1495-
if len(output) == len(agg_labels):
1496-
output_keys = agg_labels
1482+
def _decide_output_index(self, output, labels):
1483+
if len(output) == len(labels):
1484+
output_keys = labels
14971485
else:
14981486
output_keys = sorted(output)
14991487
try:
15001488
output_keys.sort()
15011489
except Exception: # pragma: no cover
15021490
pass
15031491

1504-
if isinstance(agg_labels, MultiIndex):
1492+
if isinstance(labels, MultiIndex):
15051493
output_keys = MultiIndex.from_tuples(output_keys,
1506-
names=agg_labels.names)
1507-
1508-
if not self.as_index:
1509-
result = DataFrame(output, columns=output_keys)
1510-
group_levels = self.grouper.get_group_levels()
1511-
zipped = zip(self.grouper.names, group_levels)
1494+
names=labels.names)
15121495

1513-
for i, (name, labels) in enumerate(zipped):
1514-
result.insert(i, name, labels)
1515-
result = result.consolidate()
1516-
else:
1517-
index = self.grouper.result_index
1518-
result = DataFrame(output, index=index, columns=output_keys)
1519-
1520-
if self.axis == 1:
1521-
result = result.T
1522-
1523-
return result
1496+
return output_keys
15241497

15251498
def _wrap_applied_output(self, keys, values, not_indexed_same=False):
15261499
if len(keys) == 0:
@@ -1640,13 +1613,51 @@ def __getitem__(self, key):
16401613
exclusions=self.exclusions,
16411614
as_index=self.as_index)
16421615

1616+
def _wrap_generic_output(self, result, obj):
1617+
result_index = self.grouper.levels[0]
1618+
1619+
if result:
1620+
if self.axis == 0:
1621+
result = DataFrame(result, index=obj.columns,
1622+
columns=result_index).T
1623+
else:
1624+
result = DataFrame(result, index=obj.index,
1625+
columns=result_index)
1626+
else:
1627+
result = DataFrame(result)
1628+
1629+
return result
1630+
16431631
def _get_data_to_aggregate(self):
16441632
obj = self._obj_with_exclusions
16451633
if self.axis == 1:
16461634
return obj.T._data, 1
16471635
else:
16481636
return obj._data, 1
16491637

1638+
def _wrap_aggregated_output(self, output, names=None):
1639+
agg_axis = 0 if self.axis == 1 else 1
1640+
agg_labels = self._obj_with_exclusions._get_axis(agg_axis)
1641+
1642+
output_keys = self._decide_output_index(output, agg_labels)
1643+
1644+
if not self.as_index:
1645+
result = DataFrame(output, columns=output_keys)
1646+
group_levels = self.grouper.get_group_levels()
1647+
zipped = zip(self.grouper.names, group_levels)
1648+
1649+
for i, (name, labels) in enumerate(zipped):
1650+
result.insert(i, name, labels)
1651+
result = result.consolidate()
1652+
else:
1653+
index = self.grouper.result_index
1654+
result = DataFrame(output, index=index, columns=output_keys)
1655+
1656+
if self.axis == 1:
1657+
result = result.T
1658+
1659+
return result
1660+
16501661
def _post_process_cython_aggregate(self, obj):
16511662
# undoing kludge from below
16521663
if self.axis == 0:
@@ -1733,31 +1744,58 @@ def aggregate(self, arg, *args, **kwargs):
17331744

17341745
return self._aggregate_generic(arg, *args, **kwargs)
17351746

1736-
def _aggregate_generic(self, func, *args, **kwargs):
1737-
result = {}
1747+
def _wrap_generic_output(self, result, obj):
17381748

1739-
axis = self.axis
1749+
new_axes = list(obj.axes)
1750+
new_axes[self.axis] = self.grouper.result_index
1751+
1752+
result = Panel._from_axes(result, new_axes)
1753+
1754+
if self.axis > 0:
1755+
result = result.swapaxes(0, self.axis)
1756+
1757+
return result
17401758

1759+
def _aggregate_item_by_item(self, func, *args, **kwargs):
17411760
obj = self._obj_with_exclusions
1761+
result = {}
1762+
cannot_agg = []
17421763

1743-
for name in self.grouper:
1744-
data = self.get_group(name, obj=obj)
1745-
try:
1746-
result[name] = func(data, *args, **kwargs)
1747-
except Exception:
1748-
wrapper = lambda x: func(x, *args, **kwargs)
1749-
result[name] = data.apply(wrapper, axis=axis)
1764+
if self.axis > 0:
1765+
for item in obj:
1766+
try:
1767+
itemg = DataFrameGroupBy(obj[item],
1768+
axis=self.axis - 1,
1769+
grouper=self.grouper)
1770+
result[item] = itemg.aggregate(func, *args, **kwargs)
1771+
except (ValueError, TypeError):
1772+
raise
1773+
# cannot_agg.append(item)
1774+
# continue
1775+
new_axes = list(obj.axes)
1776+
new_axes[self.axis] = self.grouper.result_index
1777+
return Panel._from_axes(result, new_axes)
1778+
else:
1779+
raise NotImplementedError
17501780

1751-
result = Panel(result)
1781+
def _wrap_aggregated_output(self, output, names=None):
1782+
raise NotImplementedError
1783+
new_axes = list(self._obj_with_exclusions.axes)
1784+
new_axes[self.axis] = self.grouper.result_index
17521785

1753-
if axis > 0:
1754-
result = result.swapaxes(0, axis)
1786+
result = Panel(output, index=self.grouper.result_index,
1787+
columns=output_keys)
1788+
1789+
if self.axis > 0:
1790+
result = result.swapaxes(0, self.axis)
17551791

17561792
return result
17571793

1794+
17581795
class NDArrayGroupBy(GroupBy):
17591796
pass
17601797

1798+
17611799
#----------------------------------------------------------------------
17621800
# Grouping generator for BlockManager
17631801

pandas/tseries/resample.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def _resample_timestamps(self, obj):
9898
# downsamples
9999
if len(grouper.binlabels) < len(axlabels):
100100
grouped = obj.groupby(grouper, axis=self.axis)
101-
result = grouped.agg(self.how)
101+
result = grouped.aggregate(self.how)
102102
else:
103103
assert(self.axis == 0)
104104
# upsampling
@@ -139,7 +139,7 @@ def _resample_periods(self, obj):
139139
grouper = BinGrouper(bins, new_index)
140140

141141
grouped = obj.groupby(grouper, axis=self.axis)
142-
return grouped.agg(self.how)
142+
return grouped.aggregate(self.how)
143143
elif is_superperiod(axlabels.freq, self.freq):
144144
# Get the fill indexer
145145
indexer = memb.get_indexer(new_index, method=self.fill_method,

pandas/tseries/tests/test_resample.py

+1
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ def test_resample_panel_numpy(self):
347347
expected = panel.resample('M', how='mean', axis=1)
348348
tm.assert_panel_equal(result, expected)
349349

350+
350351
def _simple_ts(start, end, freq='D'):
351352
rng = date_range(start, end, freq=freq)
352353
return Series(np.random.randn(len(rng)), index=rng)

0 commit comments

Comments
 (0)