Skip to content

Commit 3e1cfc5

Browse files
PERF: passthrough mask to take_1d if already known (#44666)
1 parent cba9d45 commit 3e1cfc5

File tree

2 files changed

+46
-15
lines changed

2 files changed

+46
-15
lines changed

pandas/core/array_algos/take.py

+25-4
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ def take_1d(
166166
indexer: npt.NDArray[np.intp],
167167
fill_value=None,
168168
allow_fill: bool = True,
169+
mask: npt.NDArray[np.bool_] | None = None,
169170
) -> ArrayLike:
170171
"""
171172
Specialized version for 1D arrays. Differences compared to `take_nd`:
@@ -178,6 +179,22 @@ def take_1d(
178179
179180
Note: similarly to `take_nd`, this function assumes that the indexer is
180181
a valid(ated) indexer with no out of bound indices.
182+
183+
Parameters
184+
----------
185+
arr : np.ndarray or ExtensionArray
186+
Input array.
187+
indexer : ndarray
188+
1-D array of indices to take (validated indices, intp dtype).
189+
fill_value : any, default np.nan
190+
Fill value to replace -1 values with
191+
allow_fill : bool, default True
192+
If False, indexer is assumed to contain no -1 values so no filling
193+
will be done. This short-circuits computation of a mask. Result is
194+
undefined if allow_fill == False and -1 is present in indexer.
195+
mask : np.ndarray, optional, default None
196+
If `allow_fill` is True, and the mask (where indexer == -1) is already
197+
known, it can be passed to avoid recomputation.
181198
"""
182199
if not isinstance(arr, np.ndarray):
183200
# ExtensionArray -> dispatch to their method
@@ -187,7 +204,7 @@ def take_1d(
187204
return arr.take(indexer)
188205

189206
dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
190-
arr, indexer, fill_value, True
207+
arr, indexer, fill_value, True, mask
191208
)
192209

193210
# at this point, it's guaranteed that dtype can hold both the arr values
@@ -533,8 +550,9 @@ def _take_preprocess_indexer_and_fill_value(
533550
indexer: npt.NDArray[np.intp],
534551
fill_value,
535552
allow_fill: bool,
553+
mask: npt.NDArray[np.bool_] | None = None,
536554
):
537-
mask_info = None
555+
mask_info: tuple[np.ndarray | None, bool] | None = None
538556

539557
if not allow_fill:
540558
dtype, fill_value = arr.dtype, arr.dtype.type()
@@ -545,8 +563,11 @@ def _take_preprocess_indexer_and_fill_value(
545563
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
546564
if dtype != arr.dtype:
547565
# check if promotion is actually required based on indexer
548-
mask = indexer == -1
549-
needs_masking = mask.any()
566+
if mask is not None:
567+
needs_masking = True
568+
else:
569+
mask = indexer == -1
570+
needs_masking = bool(mask.any())
550571
mask_info = mask, needs_masking
551572
if not needs_masking:
552573
# if not, then depromote, set fill_value to dummy

pandas/core/internals/array_manager.py

+21-11
Original file line numberDiff line numberDiff line change
@@ -610,16 +610,15 @@ def _reindex_indexer(
610610
else:
611611
validate_indices(indexer, len(self._axes[0]))
612612
indexer = ensure_platform_int(indexer)
613-
if (indexer == -1).any():
614-
allow_fill = True
615-
else:
616-
allow_fill = False
613+
mask = indexer == -1
614+
needs_masking = mask.any()
617615
new_arrays = [
618616
take_1d(
619617
arr,
620618
indexer,
621-
allow_fill=allow_fill,
619+
allow_fill=needs_masking,
622620
fill_value=fill_value,
621+
mask=mask,
623622
# if fill_value is not None else blk.fill_value
624623
)
625624
for arr in self.arrays
@@ -1056,22 +1055,33 @@ def unstack(self, unstacker, fill_value) -> ArrayManager:
10561055
if unstacker.mask.all():
10571056
new_indexer = indexer
10581057
allow_fill = False
1058+
new_mask2D = None
1059+
needs_masking = None
10591060
else:
10601061
new_indexer = np.full(unstacker.mask.shape, -1)
10611062
new_indexer[unstacker.mask] = indexer
10621063
allow_fill = True
1064+
# calculating the full mask once and passing it to take_1d is faster
1065+
# than letting take_1d calculate it in each repeated call
1066+
new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape)
1067+
needs_masking = new_mask2D.any(axis=0)
10631068
new_indexer2D = new_indexer.reshape(*unstacker.full_shape)
10641069
new_indexer2D = ensure_platform_int(new_indexer2D)
10651070

10661071
new_arrays = []
10671072
for arr in self.arrays:
10681073
for i in range(unstacker.full_shape[1]):
1069-
new_arr = take_1d(
1070-
arr,
1071-
new_indexer2D[:, i],
1072-
allow_fill=allow_fill,
1073-
fill_value=fill_value,
1074-
)
1074+
if allow_fill:
1075+
# error: Value of type "Optional[Any]" is not indexable [index]
1076+
new_arr = take_1d(
1077+
arr,
1078+
new_indexer2D[:, i],
1079+
allow_fill=needs_masking[i], # type: ignore[index]
1080+
fill_value=fill_value,
1081+
mask=new_mask2D[:, i], # type: ignore[index]
1082+
)
1083+
else:
1084+
new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False)
10751085
new_arrays.append(new_arr)
10761086

10771087
new_index = unstacker.new_index

0 commit comments

Comments
 (0)