diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d8875b38ed738..185b0f4da2627 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1384,15 +1384,13 @@ def equals(self, other) -> bool: return False return array_equivalent(self.values, other.values) - def _unstack(self, unstacker, new_columns, fill_value, value_columns): + def _unstack(self, unstacker, fill_value, new_placement): """ Return a list of unstacked blocks of self Parameters ---------- unstacker : reshape._Unstacker - new_columns : Index - All columns of the unstacked BlockManager. fill_value : int Only used in ExtensionBlock._unstack @@ -1403,17 +1401,17 @@ def _unstack(self, unstacker, new_columns, fill_value, value_columns): mask : array_like of bool The mask of columns of `blocks` we should keep. """ - new_items = unstacker.get_new_columns(value_columns) - new_placement = new_columns.get_indexer(new_items) new_values, mask = unstacker.get_new_values( self.values.T, fill_value=fill_value ) mask = mask.any(0) + # TODO: in all tests we have mask.all(); can we rely on that? + new_values = new_values.T[mask] new_placement = new_placement[mask] - blocks = [make_block(new_values, placement=new_placement)] + blocks = [self.make_block_same_class(new_values, placement=new_placement)] return blocks, mask def quantile(self, qs, interpolation="linear", axis: int = 0): @@ -1878,7 +1876,7 @@ def where( return [self.make_block_same_class(result, placement=self.mgr_locs)] - def _unstack(self, unstacker, new_columns, fill_value, value_columns): + def _unstack(self, unstacker, fill_value, new_placement): # ExtensionArray-safe unstack. # We override ObjectBlock._unstack, which unstacks directly on the # values of the array. For EA-backed blocks, this would require @@ -1888,10 +1886,9 @@ def _unstack(self, unstacker, new_columns, fill_value, value_columns): n_rows = self.shape[-1] dummy_arr = np.arange(n_rows) - new_items = unstacker.get_new_columns(value_columns) - new_placement = new_columns.get_indexer(new_items) new_values, mask = unstacker.get_new_values(dummy_arr, fill_value=-1) mask = mask.any(0) + # TODO: in all tests we have mask.all(); can we rely on that? blocks = [ self.make_block_same_class( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f3b4ebad9cec1..0f105b0a7ee75 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1461,8 +1461,11 @@ def unstack(self, unstacker, fill_value) -> "BlockManager": for blk in self.blocks: blk_cols = self.items[blk.mgr_locs.indexer] + new_items = unstacker.get_new_columns(blk_cols) + new_placement = new_columns.get_indexer(new_items) + blocks, mask = blk._unstack( - unstacker, new_columns, fill_value, value_columns=blk_cols, + unstacker, fill_value, new_placement=new_placement ) new_blocks.extend(blocks) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b883c5b1568a0..882e3e0a649cc 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -142,7 +142,7 @@ def sorted_labels(self): indexer, to_sort = self._indexer_and_to_sort return [l.take(indexer) for l in to_sort] - def _make_sorted_values(self, values): + def _make_sorted_values(self, values: np.ndarray) -> np.ndarray: indexer, _ = self._indexer_and_to_sort sorted_values = algos.take_nd(values, indexer, axis=0) @@ -205,6 +205,9 @@ def get_new_values(self, values, fill_value=None): # we can simply reshape if we don't have a mask if mask_all and len(values): + # TODO: Under what circumstances can we rely on sorted_values + # matching values? When that holds, we can slice instead + # of take (in particular for EAs) new_values = ( sorted_values.reshape(length, width, stride) .swapaxes(1, 2)