Skip to content

Commit 550ddbd

Browse files
committed
Move .unstack() logic onto BlockManager and Block
1 parent 01629e4 commit 550ddbd

File tree

3 files changed

+38
-40
lines changed

3 files changed

+38
-40
lines changed

pandas/core/categorical.py

+2
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ def maybe_to_categorical(array):
127127
""" coerce to a categorical if a series is given """
128128
if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
129129
return array._values
130+
elif isinstance(array, np.ndarray):
131+
return Categorical(array)
130132
return array
131133

132134

pandas/core/internals.py

+31
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,10 @@ def equals(self, other):
14631463
return False
14641464
return array_equivalent(self.values, other.values)
14651465

1466+
def _unstack(self, new_values, new_placement):
1467+
"""Return a list of unstacked blocks of self"""
1468+
return [make_block(new_values, placement=new_placement)]
1469+
14661470
def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
14671471
"""
14681472
compute the quantiles of the
@@ -1706,6 +1710,12 @@ def _slice(self, slicer):
17061710
def _try_cast_result(self, result, dtype=None):
17071711
return result
17081712

1713+
def _unstack(self, new_values, new_placement):
1714+
# NonConsolidatable blocks can have a single item only, so we return
1715+
# one block per item
1716+
return [self.make_block_same_class(vals, [place])
1717+
for vals, place in zip(new_values, new_placement)]
1718+
17091719

17101720
class NumericBlock(Block):
17111721
__slots__ = ()
@@ -4161,6 +4171,27 @@ def canonicalize(block):
41614171
return all(block.equals(oblock)
41624172
for block, oblock in zip(self_blocks, other_blocks))
41634173

4174+
def unstack(self, unstacker):
4175+
"""Return blockmanager with all blocks unstacked"""
4176+
dummy = unstacker(np.empty((0, 0)), value_columns=self.items)
4177+
new_columns = dummy.get_new_columns()
4178+
new_index = dummy.get_new_index()
4179+
new_blocks = []
4180+
mask_columns = np.zeros_like(new_columns, dtype=bool)
4181+
4182+
for blk in self.blocks:
4183+
bunstacker = unstacker(
4184+
blk.values.T, value_columns=self.items[blk.mgr_locs.indexer])
4185+
new_items = bunstacker.get_new_columns()
4186+
new_values, mask = bunstacker.get_new_values()
4187+
new_placement = new_columns.get_indexer(new_items)
4188+
mask_columns[new_placement] = mask.any(0)
4189+
new_blocks.extend(blk._unstack(new_values.T, new_placement))
4190+
4191+
bm = BlockManager(new_blocks, [new_columns, new_index])
4192+
bm = bm.take(mask_columns.nonzero()[0], axis=0)
4193+
return bm
4194+
41644195

41654196
class SingleBlockManager(BlockManager):
41664197
""" manage a single block with """

pandas/core/reshape/reshape.py

+5-40
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=W0703,W0622,W0613,W0201
33
from pandas.compat import range, zip
44
from pandas import compat
5+
from functools import partial
56
import itertools
67
import re
78

@@ -466,48 +467,12 @@ def unstack(obj, level, fill_value=None):
466467

467468

468469
def _unstack_frame(obj, level, fill_value=None):
469-
from pandas.core.internals import BlockManager, make_block as _make_block
470-
471470
if obj._is_mixed_type:
472-
unstacker = _Unstacker(np.empty((0, 0)), # dummy
473-
obj.index, level=level,
474-
value_columns=obj.columns)
475-
new_columns = unstacker.get_new_columns()
476-
new_index = unstacker.get_new_index()
477-
new_axes = [new_columns, new_index]
478-
479-
new_blocks = []
480-
mask_blocks = np.zeros_like(new_columns, dtype=bool)
481-
for blk in obj._data.blocks:
482-
blk_items = obj._data.items[blk.mgr_locs.indexer]
483-
bunstacker = _Unstacker(blk.values.T, obj.index, level=level,
484-
value_columns=blk_items,
485-
fill_value=fill_value)
486-
new_items = bunstacker.get_new_columns()
487-
new_placement = new_columns.get_indexer(new_items)
488-
new_values, mask = bunstacker.get_new_values()
489-
490-
mask_blocks[new_placement] = mask.any(0)
491-
492-
# BlockManager can't handle SparseBlocks with multiple items,
493-
# so lets make one block for each item
494-
if is_sparse(blk.values):
495-
new_placement = [[i] for i in new_placement]
496-
new_values = new_values.T
497-
make_block = blk.make_block_same_class
498-
else:
499-
new_placement = [new_placement]
500-
new_values = [new_values.T]
501-
make_block = _make_block
502-
503-
for cols, placement in zip(new_values, new_placement):
504-
newb = make_block(cols, placement=placement)
505-
new_blocks.append(newb)
506-
471+
unstacker = partial(_Unstacker, index=obj.index,
472+
level=level, fill_value=fill_value)
473+
blocks = obj._data.unstack(unstacker)
507474
klass = type(obj)
508-
assert klass in (SparseDataFrame, DataFrame), klass
509-
result = klass(BlockManager(new_blocks, new_axes))
510-
return result.loc[:, mask_blocks]
475+
return klass(blocks)
511476
else:
512477
unstacker = _Unstacker(obj.values, obj.index, level=level,
513478
value_columns=obj.columns,

0 commit comments

Comments
 (0)