Skip to content

Commit ed15e1e

Browse files
committed
Move .unstack() logic onto BlockManager and Block
1 parent 1fdbadf commit ed15e1e

File tree

3 files changed

+38
-40
lines changed

3 files changed

+38
-40
lines changed

pandas/core/categorical.py

+2
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ def maybe_to_categorical(array):
127127
""" coerce to a categorical if a series is given """
128128
if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
129129
return array._values
130+
elif isinstance(array, np.ndarray):
131+
return Categorical(array)
130132
return array
131133

132134

pandas/core/internals.py

+31
Original file line numberDiff line numberDiff line change
@@ -1467,6 +1467,10 @@ def equals(self, other):
14671467
return False
14681468
return array_equivalent(self.values, other.values)
14691469

1470+
def _unstack(self, new_values, new_placement):
1471+
"""Return a list of unstacked blocks of self"""
1472+
return [make_block(new_values, placement=new_placement)]
1473+
14701474
def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
14711475
"""
14721476
compute the quantiles of the
@@ -1703,6 +1707,12 @@ def _slice(self, slicer):
17031707
def _try_cast_result(self, result, dtype=None):
17041708
return result
17051709

1710+
def _unstack(self, new_values, new_placement):
1711+
# NonConsolidatable blocks can have a single item only, so we return
1712+
# one block per item
1713+
return [self.make_block_same_class(vals, [place])
1714+
for vals, place in zip(new_values, new_placement)]
1715+
17061716

17071717
class NumericBlock(Block):
17081718
__slots__ = ()
@@ -4158,6 +4168,27 @@ def canonicalize(block):
41584168
return all(block.equals(oblock)
41594169
for block, oblock in zip(self_blocks, other_blocks))
41604170

4171+
def unstack(self, unstacker):
4172+
"""Return blockmanager with all blocks unstacked"""
4173+
dummy = unstacker(np.empty((0, 0)), value_columns=self.items)
4174+
new_columns = dummy.get_new_columns()
4175+
new_index = dummy.get_new_index()
4176+
new_blocks = []
4177+
mask_columns = np.zeros_like(new_columns, dtype=bool)
4178+
4179+
for blk in self.blocks:
4180+
bunstacker = unstacker(
4181+
blk.values.T, value_columns=self.items[blk.mgr_locs.indexer])
4182+
new_items = bunstacker.get_new_columns()
4183+
new_values, mask = bunstacker.get_new_values()
4184+
new_placement = new_columns.get_indexer(new_items)
4185+
mask_columns[new_placement] = mask.any(0)
4186+
new_blocks.extend(blk._unstack(new_values.T, new_placement))
4187+
4188+
bm = BlockManager(new_blocks, [new_columns, new_index])
4189+
bm = bm.take(mask_columns.nonzero()[0], axis=0)
4190+
return bm
4191+
41614192

41624193
class SingleBlockManager(BlockManager):
41634194
""" manage a single block with """

pandas/core/reshape/reshape.py

+5-40
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=W0703,W0622,W0613,W0201
33
from pandas.compat import range, zip
44
from pandas import compat
5+
from functools import partial
56
import itertools
67
import re
78

@@ -466,48 +467,12 @@ def unstack(obj, level, fill_value=None):
466467

467468

468469
def _unstack_frame(obj, level, fill_value=None):
469-
from pandas.core.internals import BlockManager, make_block as _make_block
470-
471470
if obj._is_mixed_type:
472-
unstacker = _Unstacker(np.empty((0, 0)), # dummy
473-
obj.index, level=level,
474-
value_columns=obj.columns)
475-
new_columns = unstacker.get_new_columns()
476-
new_index = unstacker.get_new_index()
477-
new_axes = [new_columns, new_index]
478-
479-
new_blocks = []
480-
mask_blocks = np.zeros_like(new_columns, dtype=bool)
481-
for blk in obj._data.blocks:
482-
blk_items = obj._data.items[blk.mgr_locs.indexer]
483-
bunstacker = _Unstacker(blk.values.T, obj.index, level=level,
484-
value_columns=blk_items,
485-
fill_value=fill_value)
486-
new_items = bunstacker.get_new_columns()
487-
new_placement = new_columns.get_indexer(new_items)
488-
new_values, mask = bunstacker.get_new_values()
489-
490-
mask_blocks[new_placement] = mask.any(0)
491-
492-
# BlockManager can't handle SparseBlocks with multiple items,
493-
# so lets make one block for each item
494-
if is_sparse(blk.values):
495-
new_placement = [[i] for i in new_placement]
496-
new_values = new_values.T
497-
make_block = blk.make_block_same_class
498-
else:
499-
new_placement = [new_placement]
500-
new_values = [new_values.T]
501-
make_block = _make_block
502-
503-
for cols, placement in zip(new_values, new_placement):
504-
newb = make_block(cols, placement=placement)
505-
new_blocks.append(newb)
506-
471+
unstacker = partial(_Unstacker, index=obj.index,
472+
level=level, fill_value=fill_value)
473+
blocks = obj._data.unstack(unstacker)
507474
klass = type(obj)
508-
assert klass in (SparseDataFrame, DataFrame), klass
509-
result = klass(BlockManager(new_blocks, new_axes))
510-
return result.loc[:, mask_blocks]
475+
return klass(blocks)
511476
else:
512477
unstacker = _Unstacker(obj.values, obj.index, level=level,
513478
value_columns=obj.columns,

0 commit comments

Comments
 (0)