Skip to content

Commit 4e5e1a6

Browse files
jbrockmendelfeefladder
authored andcommitted
PERF: unstack (pandas-dev#43335)
1 parent eb0dc73 commit 4e5e1a6

File tree

2 files changed

+20
-7
lines changed

2 files changed

+20
-7
lines changed

pandas/core/internals/blocks.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1653,12 +1653,9 @@ def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
16531653
# converting to a 2-D ndarray of objects.
16541654
# Instead, we unstack an ndarray of integer positions, followed by
16551655
# a `take` on the actual values.
1656-
n_rows = self.shape[-1]
1657-
dummy_arr = np.arange(n_rows)
16581656

1659-
new_values, mask = unstacker.get_new_values(dummy_arr, fill_value=-1)
1660-
mask = mask.any(0)
1661-
# TODO: in all tests we have mask.all(); can we rely on that?
1657+
# Caller is responsible for ensuring self.shape[-1] == len(unstacker.index)
1658+
new_values, mask = unstacker.arange_result
16621659

16631660
# Note: these next two lines ensure that
16641661
# mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)

pandas/core/reshape/reshape.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,18 @@ def _make_selectors(self):
189189
self.unique_groups = obs_ids
190190
self.compressor = comp_index.searchsorted(np.arange(ngroups))
191191

192+
@cache_readonly
193+
def mask_all(self) -> bool:
194+
return bool(self.mask.all())
195+
196+
@cache_readonly
197+
def arange_result(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.bool_]]:
198+
# We cache this for re-use in ExtensionBlock._unstack
199+
dummy_arr = np.arange(len(self.index), dtype=np.intp)
200+
new_values, mask = self.get_new_values(dummy_arr, fill_value=-1)
201+
return new_values, mask.any(0)
202+
# TODO: in all tests we have mask.any(0).all(); can we rely on that?
203+
192204
def get_result(self, values, value_columns, fill_value):
193205

194206
if values.ndim == 1:
@@ -216,7 +228,7 @@ def get_new_values(self, values, fill_value=None):
216228
result_width = width * stride
217229
result_shape = (length, result_width)
218230
mask = self.mask
219-
mask_all = mask.all()
231+
mask_all = self.mask_all
220232

221233
# we can simply reshape if we don't have a mask
222234
if mask_all and len(values):
@@ -510,7 +522,11 @@ def _unstack_extension_series(series, level, fill_value):
510522
# Defer to the logic in ExtensionBlock._unstack
511523
df = series.to_frame()
512524
result = df.unstack(level=level, fill_value=fill_value)
513-
return result.droplevel(level=0, axis=1)
525+
526+
# equiv: result.droplevel(level=0, axis=1)
527+
# but this avoids an extra copy
528+
result.columns = result.columns.droplevel(0)
529+
return result
514530

515531

516532
def stack(frame, level=-1, dropna=True):

0 commit comments

Comments
 (0)