Skip to content

Commit 9e188c1

Browse files
committed
BUG: Fix/test SparseSeries/SparseDataFrame stack/unstack
1 parent 10c17d4 commit 9e188c1

File tree

3 files changed

+56
-12
lines changed

3 files changed

+56
-12
lines changed

doc/source/whatsnew/v0.20.2.txt

+3
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ Bug Fixes
5252
- Bug in :func:`cut` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`)
5353
- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on ``Categoricals`` (:issue:`16409`)
5454

55+
- Bug in ``SparseSeries.unstack()`` and ``SparseDataFrame.stack()`` (:issue:`16614`, :issue:`15045`)
56+
57+
5558
Conversion
5659
^^^^^^^^^^
5760

pandas/core/reshape/reshape.py

+30-12
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.core.dtypes.common import (
1111
_ensure_platform_int,
1212
is_list_like, is_bool_dtype,
13-
needs_i8_conversion)
13+
needs_i8_conversion, is_sparse)
1414
from pandas.core.dtypes.cast import maybe_promote
1515
from pandas.core.dtypes.missing import notnull
1616
import pandas.core.dtypes.concat as _concat
@@ -75,10 +75,15 @@ def __init__(self, values, index, level=-1, value_columns=None,
7575
fill_value=None):
7676

7777
self.is_categorical = None
78+
self.is_sparse = is_sparse(values)
7879
if values.ndim == 1:
7980
if isinstance(values, Categorical):
8081
self.is_categorical = values
8182
values = np.array(values)
83+
elif self.is_sparse:
84+
# XXX: Makes SparseArray *dense*, but it's supposedly
85+
# a single column at a time, so it's "doable"
86+
values = values.values
8287
values = values[:, np.newaxis]
8388
self.values = values
8489
self.value_columns = value_columns
@@ -177,7 +182,8 @@ def get_result(self):
177182
ordered=ordered)
178183
for i in range(values.shape[-1])]
179184

180-
return DataFrame(values, index=index, columns=columns)
185+
klass = SparseDataFrame if self.is_sparse else DataFrame
186+
return klass(values, index=index, columns=columns)
181187

182188
def get_new_values(self):
183189
values = self.values
@@ -463,15 +469,15 @@ def _unstack_frame(obj, level, fill_value=None):
463469
from pandas.core.internals import BlockManager, make_block
464470

465471
if obj._is_mixed_type:
466-
unstacker = _Unstacker(np.empty(obj.shape, dtype=bool), # dummy
472+
unstacker = _Unstacker(np.empty((0, 0)), # dummy
467473
obj.index, level=level,
468474
value_columns=obj.columns)
469475
new_columns = unstacker.get_new_columns()
470476
new_index = unstacker.get_new_index()
471477
new_axes = [new_columns, new_index]
472478

473479
new_blocks = []
474-
mask_blocks = []
480+
mask_blocks = np.zeros_like(new_columns, dtype=bool)
475481
for blk in obj._data.blocks:
476482
blk_items = obj._data.items[blk.mgr_locs.indexer]
477483
bunstacker = _Unstacker(blk.values.T, obj.index, level=level,
@@ -481,15 +487,25 @@ def _unstack_frame(obj, level, fill_value=None):
481487
new_placement = new_columns.get_indexer(new_items)
482488
new_values, mask = bunstacker.get_new_values()
483489

484-
mblk = make_block(mask.T, placement=new_placement)
485-
mask_blocks.append(mblk)
490+
mask_blocks[new_placement] = mask.any(0)
486491

487-
newb = make_block(new_values.T, placement=new_placement)
488-
new_blocks.append(newb)
492+
# BlockManager can't handle SparseBlocks with multiple items,
493+
# so lets make one block for each item
494+
if is_sparse(blk.values):
495+
new_placement = [[i] for i in new_placement]
496+
new_values = new_values.T
497+
else:
498+
new_placement = [new_placement]
499+
new_values = [new_values.T]
500+
501+
for cols, placement in zip(new_values, new_placement):
502+
newb = blk.make_block_same_class(cols, placement=placement)
503+
new_blocks.append(newb)
489504

490-
result = DataFrame(BlockManager(new_blocks, new_axes))
491-
mask_frame = DataFrame(BlockManager(mask_blocks, new_axes))
492-
return result.loc[:, mask_frame.sum(0) > 0]
505+
klass = type(obj)
506+
assert klass in (SparseDataFrame, DataFrame), klass
507+
result = klass(BlockManager(new_blocks, new_axes))
508+
return result.loc[:, mask_blocks]
493509
else:
494510
unstacker = _Unstacker(obj.values, obj.index, level=level,
495511
value_columns=obj.columns,
@@ -550,7 +566,9 @@ def factorize(index):
550566
mask = notnull(new_values)
551567
new_values = new_values[mask]
552568
new_index = new_index[mask]
553-
return Series(new_values, index=new_index)
569+
570+
klass = SparseSeries if isinstance(frame, SparseDataFrame) else Series
571+
return klass(new_values, index=new_index)
554572

555573

556574
def stack_multiple(frame, level, dropna=True):

pandas/tests/test_multilevel.py

+23
Original file line numberDiff line numberDiff line change
@@ -2381,6 +2381,29 @@ def test_iloc_mi(self):
23812381
tm.assert_frame_equal(result, expected)
23822382

23832383

2384+
class TestSparse(object):
2385+
def setup_method(self, method):
2386+
self.sdf = pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye
2387+
self.mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
2388+
2389+
def test_sparse_frame_stack(self):
2390+
ss = self.sdf.stack()
2391+
expected = pd.SparseSeries(np.ones(3), index=self.mi)
2392+
tm.assert_sp_series_equal(ss, expected)
2393+
2394+
def test_sparse_frame_unstack(self):
2395+
mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)])
2396+
sdf = self.sdf
2397+
sdf.index = mi
2398+
df = pd.DataFrame(np.eye(3), index=mi).replace(0, np.nan)
2399+
2400+
tm.assert_numpy_array_equal(df.unstack().values, sdf.unstack().values)
2401+
2402+
def test_sparse_series_unstack(self):
2403+
frame = pd.SparseSeries(np.ones(3), index=self.mi).unstack()
2404+
tm.assert_sp_frame_equal(frame, self.sdf)
2405+
2406+
23842407
class TestSorted(Base):
23852408
""" everthing you wanted to test about sorting """
23862409

0 commit comments

Comments
 (0)