From f0da7e0f9861e02c34d84e0dd8b5b48b9a679ff2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 5 Mar 2021 09:38:52 +0100 Subject: [PATCH 1/3] PERF: specialized 1D take version --- pandas/core/array_algos/take.py | 37 ++++++++++++++++++++++++++ pandas/core/internals/array_manager.py | 4 +-- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 2179085d9ad9b..a9b07a67377bd 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -121,6 +121,43 @@ def _take_nd_ndarray( return out +def take_1d( + arr: ArrayLike, + indexer: np.ndarray, + fill_value=None, + allow_fill: bool = True, +): + """ + Specialized version for 1D arrays. Differences compared to take_nd: + + - Assumes input (arr, indexer) has already been converted to numpy array / EA + - Only works for 1D arrays + + To ensure the lowest possible overhead. + + TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially + be removed again if we don't end up with ArrayManager. + """ + if not isinstance(arr, np.ndarray): + # ExtensionArray -> dispatch to their method + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + + indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value( + arr, indexer, 0, None, fill_value, allow_fill + ) + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + out = np.empty(indexer.shape, dtype=dtype) + + func = _get_take_nd_function( + arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info + ) + func(arr, indexer, out, fill_value) + + return out + + def take_2d_multi( arr: np.ndarray, indexer: np.ndarray, fill_value=np.nan ) -> np.ndarray: diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 0449be84bdcf7..43f686cdc8197 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -58,7 +58,7 @@ import pandas.core.algorithms as algos from pandas.core.array_algos.quantile import quantile_compat -from pandas.core.array_algos.take import take_nd +from pandas.core.array_algos.take import take_1d from pandas.core.arrays import ( DatetimeArray, ExtensionArray, @@ -1048,7 +1048,7 @@ def unstack(self, unstacker, fill_value) -> ArrayManager: new_arrays = [] for arr in self.arrays: for i in range(unstacker.full_shape[1]): - new_arr = take_nd( + new_arr = take_1d( arr, new_indexer2D[:, i], allow_fill=True, fill_value=fill_value ) new_arrays.append(new_arr) From 825d2bdc16d1a2df2f2ef8cd2c50fccc1f140ea4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 5 Mar 2021 16:14:27 +0100 Subject: [PATCH 2/3] type annotation --- pandas/core/array_algos/take.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index a9b07a67377bd..054497089c5ab 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -126,7 +126,7 @@ def take_1d( indexer: np.ndarray, fill_value=None, allow_fill: bool = True, -): +) -> ArrayLike: """ Specialized version for 1D arrays. Differences compared to take_nd: From e6ce02ab50a71ccb994c0db9dc6f1cbb95b64496 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 5 Mar 2021 20:08:30 +0100 Subject: [PATCH 3/3] use take_1d in reindex --- pandas/core/internals/array_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index ff03384a5d5a8..905fa448ff033 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -991,7 +991,7 @@ def _reindex_indexer( else: validate_indices(indexer, len(self._axes[0])) new_arrays = [ - take_nd( + take_1d( arr, indexer, allow_fill=True,