Skip to content

Commit 85dd837

Browse files
PERF: specialized 1D take version (#40246)
1 parent 3888a3f commit 85dd837

File tree

2 files changed

+40
-3
lines changed

2 files changed

+40
-3
lines changed

pandas/core/array_algos/take.py

+37
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,43 @@ def _take_nd_ndarray(
121121
return out
122122

123123

124+
def take_1d(
125+
arr: ArrayLike,
126+
indexer: np.ndarray,
127+
fill_value=None,
128+
allow_fill: bool = True,
129+
) -> ArrayLike:
130+
"""
131+
Specialized version for 1D arrays. Differences compared to take_nd:
132+
133+
- Assumes input (arr, indexer) has already been converted to numpy array / EA
134+
- Only works for 1D arrays
135+
136+
To ensure the lowest possible overhead.
137+
138+
TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially
139+
be removed again if we don't end up with ArrayManager.
140+
"""
141+
if not isinstance(arr, np.ndarray):
142+
# ExtensionArray -> dispatch to their method
143+
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
144+
145+
indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
146+
arr, indexer, 0, None, fill_value, allow_fill
147+
)
148+
149+
# at this point, it's guaranteed that dtype can hold both the arr values
150+
# and the fill_value
151+
out = np.empty(indexer.shape, dtype=dtype)
152+
153+
func = _get_take_nd_function(
154+
arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info
155+
)
156+
func(arr, indexer, out, fill_value)
157+
158+
return out
159+
160+
124161
def take_2d_multi(
125162
arr: np.ndarray, indexer: np.ndarray, fill_value=np.nan
126163
) -> np.ndarray:

pandas/core/internals/array_manager.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858

5959
import pandas.core.algorithms as algos
6060
from pandas.core.array_algos.quantile import quantile_compat
61-
from pandas.core.array_algos.take import take_nd
61+
from pandas.core.array_algos.take import take_1d
6262
from pandas.core.arrays import (
6363
DatetimeArray,
6464
ExtensionArray,
@@ -991,7 +991,7 @@ def _reindex_indexer(
991991
else:
992992
validate_indices(indexer, len(self._axes[0]))
993993
new_arrays = [
994-
take_nd(
994+
take_1d(
995995
arr,
996996
indexer,
997997
allow_fill=True,
@@ -1073,7 +1073,7 @@ def unstack(self, unstacker, fill_value) -> ArrayManager:
10731073
new_arrays = []
10741074
for arr in self.arrays:
10751075
for i in range(unstacker.full_shape[1]):
1076-
new_arr = take_nd(
1076+
new_arr = take_1d(
10771077
arr, new_indexer2D[:, i], allow_fill=True, fill_value=fill_value
10781078
)
10791079
new_arrays.append(new_arr)

0 commit comments

Comments
 (0)