From 7c33dc2b94ea64cf53683800fbb32e224386e17e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 9 Apr 2020 09:55:33 -0700 Subject: [PATCH 1/2] REF: move shared calls up the stack --- pandas/core/internals/blocks.py | 10 ++-------- pandas/core/internals/managers.py | 5 ++++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d12b78f8d046f..ba9bc3427f711 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1385,15 +1385,13 @@ def equals(self, other) -> bool: return False return array_equivalent(self.values, other.values) - def _unstack(self, unstacker, new_columns, fill_value, value_columns): + def _unstack(self, unstacker, fill_value, new_placement): """ Return a list of unstacked blocks of self Parameters ---------- unstacker : reshape._Unstacker - new_columns : Index - All columns of the unstacked BlockManager. fill_value : int Only used in ExtensionBlock._unstack @@ -1404,8 +1402,6 @@ def _unstack(self, unstacker, new_columns, fill_value, value_columns): mask : array_like of bool The mask of columns of `blocks` we should keep. """ - new_items = unstacker.get_new_columns(value_columns) - new_placement = new_columns.get_indexer(new_items) new_values, mask = unstacker.get_new_values( self.values.T, fill_value=fill_value ) @@ -1846,7 +1842,7 @@ def where( return [self.make_block_same_class(result, placement=self.mgr_locs)] - def _unstack(self, unstacker, new_columns, fill_value, value_columns): + def _unstack(self, unstacker, fill_value, new_placement): # ExtensionArray-safe unstack. # We override ObjectBlock._unstack, which unstacks directly on the # values of the array. For EA-backed blocks, this would require @@ -1856,8 +1852,6 @@ def _unstack(self, unstacker, new_columns, fill_value, value_columns): n_rows = self.shape[-1] dummy_arr = np.arange(n_rows) - new_items = unstacker.get_new_columns(value_columns) - new_placement = new_columns.get_indexer(new_items) new_values, mask = unstacker.get_new_values(dummy_arr, fill_value=-1) mask = mask.any(0) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index bfb16b48d832c..6c6630c6b4a71 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1452,8 +1452,11 @@ def unstack(self, unstacker, fill_value) -> "BlockManager": for blk in self.blocks: blk_cols = self.items[blk.mgr_locs.indexer] + new_items = unstacker.get_new_columns(blk_cols) + new_placement = new_columns.get_indexer(new_items) + blocks, mask = blk._unstack( - unstacker, new_columns, fill_value, value_columns=blk_cols, + unstacker, fill_value, new_placement=new_placement ) new_blocks.extend(blocks) From 8f0e5d469c8eefeba2ccd10b586ce89fbb698c25 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 10 Apr 2020 19:30:02 -0700 Subject: [PATCH 2/2] Comments --- pandas/core/internals/blocks.py | 5 ++++- pandas/core/reshape/reshape.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f314d09208ba6..185b0f4da2627 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1406,10 +1406,12 @@ def _unstack(self, unstacker, fill_value, new_placement): ) mask = mask.any(0) + # TODO: in all tests we have mask.all(); can we rely on that? + new_values = new_values.T[mask] new_placement = new_placement[mask] - blocks = [make_block(new_values, placement=new_placement)] + blocks = [self.make_block_same_class(new_values, placement=new_placement)] return blocks, mask def quantile(self, qs, interpolation="linear", axis: int = 0): @@ -1886,6 +1888,7 @@ def _unstack(self, unstacker, fill_value, new_placement): new_values, mask = unstacker.get_new_values(dummy_arr, fill_value=-1) mask = mask.any(0) + # TODO: in all tests we have mask.all(); can we rely on that? blocks = [ self.make_block_same_class( diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b883c5b1568a0..882e3e0a649cc 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -142,7 +142,7 @@ def sorted_labels(self): indexer, to_sort = self._indexer_and_to_sort return [l.take(indexer) for l in to_sort] - def _make_sorted_values(self, values): + def _make_sorted_values(self, values: np.ndarray) -> np.ndarray: indexer, _ = self._indexer_and_to_sort sorted_values = algos.take_nd(values, indexer, axis=0) @@ -205,6 +205,9 @@ def get_new_values(self, values, fill_value=None): # we can simply reshape if we don't have a mask if mask_all and len(values): + # TODO: Under what circumstances can we rely on sorted_values + # matching values? When that holds, we can slice instead + # of take (in particular for EAs) new_values = ( sorted_values.reshape(length, width, stride) .swapaxes(1, 2)