Skip to content

Commit 2d3a9b3

Browse files
committed
ENH: unstack multiple levels in one shot per #1181
1 parent 773d861 commit 2d3a9b3

File tree

6 files changed

+52
-66
lines changed

6 files changed

+52
-66
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ pandas 0.8.0
7070
- Fix segfault caused by empty groups passed to groupby (#1048)
7171
- Fix occasionally misbehaved reindexing in the presence of NaN labels (#522)
7272
- Fix imprecise logic causing weird Series results from .apply (#1183)
73+
- Unstack multiple levels in one shot, avoiding empty columns in some
74+
cases. Fix pivot table bug (#1181)
7375

7476
pandas 0.7.3
7577
============

pandas/core/frame.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -2992,17 +2992,7 @@ def unstack(self, level=-1):
29922992
unstacked : DataFrame or Series
29932993
"""
29942994
from pandas.core.reshape import unstack
2995-
if isinstance(level, (tuple, list)):
2996-
result = self
2997-
to_unstack = level
2998-
while to_unstack:
2999-
lev = to_unstack[0]
3000-
result = unstack(result, lev)
3001-
to_unstack = [other - 1 if other > lev else other
3002-
for other in to_unstack[1:]]
3003-
return result
3004-
else:
3005-
return unstack(self, level)
2995+
return unstack(self, level)
30062996

30072997
#----------------------------------------------------------------------
30082998
# Time series-related

pandas/core/reshape.py

+35-33
Original file line numberDiff line numberDiff line change
@@ -209,37 +209,52 @@ def _unstack_multiple(data, clocs):
209209

210210
index = data.index
211211

212-
clevels, rlevels = _partition(index.levels, clocs)
213-
clabels, rlabels = _partition(index.labels, clocs)
214-
cnames, rnames = _partition(index.names, clocs)
212+
clocs = [index._get_level_number(i) for i in clocs]
213+
214+
rlocs = [i for i in range(index.nlevels) if i not in clocs]
215+
216+
clevels = [index.levels[i] for i in clocs]
217+
clabels = [index.labels[i] for i in clocs]
218+
cnames = [index.names[i] for i in clocs]
219+
rlevels = [index.levels[i] for i in rlocs]
220+
rlabels = [index.labels[i] for i in rlocs]
221+
rnames = [index.names[i] for i in rlocs]
215222

216223
shape = [len(x) for x in clevels]
217224
group_index = get_group_index(clabels, shape)
218225

219226
comp_ids, obs_ids = _compress_group_index(group_index, sort=False)
227+
recons_labels = decons_group_index(obs_ids, shape)
220228

221229
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
222230
labels=rlabels + [comp_ids],
223231
names=rnames + ['__placeholder__'])
224232

225-
dummy = DataFrame(data.values, index=dummy_index,
226-
columns=data.columns)
227-
228-
unstacked = dummy.unstack('__placeholder__')
229-
230-
if isinstance(unstacked, Series):
231-
unstcols = unstacked.index
233+
if isinstance(data, Series):
234+
dummy = Series(data.values, index=dummy_index)
235+
unstacked = dummy.unstack('__placeholder__')
236+
new_levels = clevels
237+
new_names = cnames
238+
new_labels = recons_labels
232239
else:
233-
unstcols = unstacked.columns
240+
if isinstance(data.columns, MultiIndex):
241+
raise NotImplementedError('Unstacking multiple levels with '
242+
'hierarchical columns not yet supported')
234243

235-
new_levels = [unstcols.levels[0]] + clevels
236-
new_names = [data.columns.name] + cnames
244+
dummy = DataFrame(data.values, index=dummy_index,
245+
columns=data.columns)
237246

238-
recons_labels = decons_group_index(obs_ids, shape)
247+
unstacked = dummy.unstack('__placeholder__')
248+
if isinstance(unstacked, Series):
249+
unstcols = unstacked.index
250+
else:
251+
unstcols = unstacked.columns
252+
new_levels = [unstcols.levels[0]] + clevels
253+
new_names = [data.columns.name] + cnames
239254

240-
new_labels = [unstcols.labels[0]]
241-
for rec in recons_labels:
242-
new_labels.append(rec.take(unstcols.labels[-1]))
255+
new_labels = [unstcols.labels[0]]
256+
for rec in recons_labels:
257+
new_labels.append(rec.take(unstcols.labels[-1]))
243258

244259
new_columns = MultiIndex(levels=new_levels, labels=new_labels,
245260
names=new_names)
@@ -251,22 +266,6 @@ def _unstack_multiple(data, clocs):
251266

252267
return unstacked
253268

254-
255-
def _partition(values, inds):
256-
left = []
257-
right = []
258-
259-
set_inds = set(inds)
260-
261-
for i, val in enumerate(values):
262-
if i in set_inds:
263-
left.append(val)
264-
else:
265-
right.append(val)
266-
267-
return left, right
268-
269-
270269
def pivot(self, index=None, columns=None, values=None):
271270
"""
272271
See DataFrame.pivot
@@ -351,6 +350,9 @@ def _slow_pivot(index, columns, values):
351350
return DataFrame(tree)
352351

353352
def unstack(obj, level):
353+
if isinstance(level, (tuple, list)):
354+
return _unstack_multiple(obj, level)
355+
354356
if isinstance(obj, DataFrame):
355357
if isinstance(obj.index, MultiIndex):
356358
return _unstack_frame(obj, level)

pandas/core/series.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -1814,13 +1814,7 @@ def unstack(self, level=-1):
18141814
unstacked : DataFrame
18151815
"""
18161816
from pandas.core.reshape import unstack
1817-
if isinstance(level, (tuple, list)):
1818-
result = self
1819-
for lev in level:
1820-
result = unstack(result, lev)
1821-
return result
1822-
else:
1823-
return unstack(self, level)
1817+
return unstack(self, level)
18241818

18251819
#----------------------------------------------------------------------
18261820
# function application

pandas/tests/test_multilevel.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -607,15 +607,15 @@ def test_unstack(self):
607607
# test that ints work
608608
unstacked = self.ymd.astype(int).unstack()
609609

610-
# def test_unstack_multiple_no_empty_columns(self):
611-
# index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0),
612-
# (1, 'baz', 1), (1, 'qux', 1)])
610+
def test_unstack_multiple_no_empty_columns(self):
611+
index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0),
612+
(1, 'baz', 1), (1, 'qux', 1)])
613613

614-
# s = Series(np.random.randn(4), index=index)
614+
s = Series(np.random.randn(4), index=index)
615615

616-
# unstacked = s.unstack([1, 2])
617-
# expected = unstacked.dropna(axis=1, how='all')
618-
# assert_frame_equal(unstacked, expected)
616+
unstacked = s.unstack([1, 2])
617+
expected = unstacked.dropna(axis=1, how='all')
618+
assert_frame_equal(unstacked, expected)
619619

620620
def test_stack(self):
621621
# regular roundtrip
@@ -738,12 +738,12 @@ def test_stack_unstack_multiple(self):
738738

739739
# GH #451
740740
unstacked = self.ymd.unstack([1, 2])
741-
expected = self.ymd.unstack(1).unstack(1)
741+
expected = self.ymd.unstack(1).unstack(1).dropna(axis=1, how='all')
742742
assert_frame_equal(unstacked, expected)
743743

744744
unstacked = self.ymd.unstack([2, 1])
745-
expected = self.ymd.unstack(2).unstack(1)
746-
assert_frame_equal(unstacked, expected)
745+
expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how='all')
746+
assert_frame_equal(unstacked, expected.ix[:, unstacked.columns])
747747

748748
def test_groupby_transform(self):
749749
s = self.frame['A']

pandas/tools/pivot.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,10 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
9898
grouped = data.groupby(keys)
9999
agged = grouped.agg(aggfunc)
100100

101-
table = _unstack_multiple(agged, range(len(rows), len(keys)))
101+
to_unstack = [agged.index.names[i]
102+
for i in range(len(rows), len(keys))]
102103

103-
# table = agged
104-
# for i in range(len(cols)):
105-
# name = table.index.names[len(rows)]
106-
# table = table.unstack(name)
104+
table = agged.unstack(to_unstack)
107105

108106
if fill_value is not None:
109107
table = table.fillna(value=fill_value)

0 commit comments

Comments
 (0)