Skip to content

Commit c2f1271

Browse files
authored
REV: Revert #40510 (#40818)
1 parent 9f668bd commit c2f1271

File tree

7 files changed

+63
-44
lines changed

7 files changed

+63
-44
lines changed

pandas/core/array_algos/take.py

+45-34
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,14 @@ def take_nd(
100100
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
101101

102102
arr = np.asarray(arr)
103-
return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
103+
return _take_nd_ndarray(arr, indexer, axis, None, fill_value, allow_fill)
104104

105105

106106
def _take_nd_ndarray(
107107
arr: np.ndarray,
108108
indexer,
109109
axis: int,
110+
out: np.ndarray | None,
110111
fill_value,
111112
allow_fill: bool,
112113
) -> np.ndarray:
@@ -117,11 +118,8 @@ def _take_nd_ndarray(
117118
else:
118119
indexer = ensure_platform_int(indexer)
119120

120-
if not allow_fill:
121-
return arr.take(indexer, axis=axis)
122-
123-
dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
124-
arr, indexer, fill_value
121+
indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
122+
arr, indexer, out, fill_value, allow_fill
125123
)
126124

127125
flip_order = False
@@ -131,20 +129,23 @@ def _take_nd_ndarray(
131129
if flip_order:
132130
arr = arr.T
133131
axis = arr.ndim - axis - 1
132+
if out is not None:
133+
out = out.T
134134

135135
# at this point, it's guaranteed that dtype can hold both the arr values
136136
# and the fill_value
137-
out_shape_ = list(arr.shape)
138-
out_shape_[axis] = len(indexer)
139-
out_shape = tuple(out_shape_)
140-
if arr.flags.f_contiguous and axis == arr.ndim - 1:
141-
# minor tweak that can make an order-of-magnitude difference
142-
# for dataframes initialized directly from 2-d ndarrays
143-
# (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
144-
# f-contiguous transpose)
145-
out = np.empty(out_shape, dtype=dtype, order="F")
146-
else:
147-
out = np.empty(out_shape, dtype=dtype)
137+
if out is None:
138+
out_shape_ = list(arr.shape)
139+
out_shape_[axis] = len(indexer)
140+
out_shape = tuple(out_shape_)
141+
if arr.flags.f_contiguous and axis == arr.ndim - 1:
142+
# minor tweak that can make an order-of-magnitude difference
143+
# for dataframes initialized directly from 2-d ndarrays
144+
# (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
145+
# f-contiguous transpose)
146+
out = np.empty(out_shape, dtype=dtype, order="F")
147+
else:
148+
out = np.empty(out_shape, dtype=dtype)
148149

149150
func = _get_take_nd_function(
150151
arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
@@ -188,8 +189,8 @@ def take_1d(
188189
if not allow_fill:
189190
return arr.take(indexer)
190191

191-
dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
192-
arr, indexer, fill_value
192+
indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
193+
arr, indexer, None, fill_value, True
193194
)
194195

195196
# at this point, it's guaranteed that dtype can hold both the arr values
@@ -515,22 +516,32 @@ def _take_2d_multi_object(
515516
def _take_preprocess_indexer_and_fill_value(
516517
arr: np.ndarray,
517518
indexer: np.ndarray,
519+
out: np.ndarray | None,
518520
fill_value,
521+
allow_fill: bool,
519522
):
520523
mask_info = None
521524

522-
# check for promotion based on types only (do this first because
523-
# it's faster than computing a mask)
524-
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
525-
if dtype != arr.dtype:
526-
# check if promotion is actually required based on indexer
527-
mask = indexer == -1
528-
needs_masking = mask.any()
529-
mask_info = mask, needs_masking
530-
if not needs_masking:
531-
# if not, then depromote, set fill_value to dummy
532-
# (it won't be used but we don't want the cython code
533-
# to crash when trying to cast it to dtype)
534-
dtype, fill_value = arr.dtype, arr.dtype.type()
535-
536-
return dtype, fill_value, mask_info
525+
if not allow_fill:
526+
dtype, fill_value = arr.dtype, arr.dtype.type()
527+
mask_info = None, False
528+
else:
529+
# check for promotion based on types only (do this first because
530+
# it's faster than computing a mask)
531+
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
532+
if dtype != arr.dtype:
533+
# check if promotion is actually required based on indexer
534+
mask = indexer == -1
535+
needs_masking = mask.any()
536+
mask_info = mask, needs_masking
537+
if needs_masking:
538+
if out is not None and out.dtype != dtype:
539+
raise TypeError("Incompatible type for fill_value")
540+
else:
541+
# if not, then depromote, set fill_value to dummy
542+
# (it won't be used but we don't want the cython code
543+
# to crash when trying to cast it to dtype)
544+
dtype, fill_value = arr.dtype, arr.dtype.type()
545+
546+
indexer = ensure_platform_int(indexer)
547+
return indexer, dtype, fill_value, mask_info

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1140,7 +1140,7 @@ def _numba_prep(self, func, data):
11401140
)
11411141
labels, _, n_groups = self.grouper.group_info
11421142
sorted_index = get_group_index_sorter(labels, n_groups)
1143-
sorted_labels = labels.take(sorted_index)
1143+
sorted_labels = algorithms.take_nd(labels, sorted_index, allow_fill=False)
11441144

11451145
sorted_data = data.take(sorted_index, axis=self.axis).to_numpy()
11461146

pandas/core/groupby/ops.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
maybe_fill,
6868
)
6969

70+
from pandas.core import algorithms
7071
from pandas.core.arrays import ExtensionArray
7172
from pandas.core.base import SelectionMixin
7273
import pandas.core.common as com
@@ -765,7 +766,7 @@ def _aggregate_series_fast(self, obj: Series, func: F):
765766
# avoids object / Series creation overhead
766767
indexer = get_group_index_sorter(group_index, ngroups)
767768
obj = obj.take(indexer)
768-
group_index = group_index.take(indexer)
769+
group_index = algorithms.take_nd(group_index, indexer, allow_fill=False)
769770
grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups)
770771
result, counts = grouper.get_result()
771772
return result, counts
@@ -996,7 +997,7 @@ def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0):
996997
@cache_readonly
997998
def slabels(self) -> np.ndarray: # np.ndarray[np.intp]
998999
# Sorted labels
999-
return self.labels.take(self._sort_idx)
1000+
return algorithms.take_nd(self.labels, self._sort_idx, allow_fill=False)
10001001

10011002
@cache_readonly
10021003
def _sort_idx(self) -> np.ndarray: # np.ndarray[np.intp]

pandas/core/indexes/base.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2968,7 +2968,7 @@ def _union(self, other: Index, sort):
29682968
missing = algos.unique1d(self.get_indexer_non_unique(other)[1])
29692969

29702970
if len(missing) > 0:
2971-
other_diff = rvals.take(missing)
2971+
other_diff = algos.take_nd(rvals, missing, allow_fill=False)
29722972
result = concat_compat((lvals, other_diff))
29732973
else:
29742974
# error: Incompatible types in assignment (expression has type
@@ -4240,7 +4240,9 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray:
42404240
)
42414241

42424242
if right_lev_indexer is not None:
4243-
right_indexer = right_lev_indexer.take(join_index.codes[level])
4243+
right_indexer = algos.take_nd(
4244+
right_lev_indexer, join_index.codes[level], allow_fill=False
4245+
)
42444246
else:
42454247
right_indexer = join_index.codes[level]
42464248

pandas/core/indexes/multi.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -3531,10 +3531,14 @@ def equals(self, other: object) -> bool:
35313531
if not np.array_equal(self_mask, other_mask):
35323532
return False
35333533
self_codes = self_codes[~self_mask]
3534-
self_values = self.levels[i]._values.take(self_codes)
3534+
self_values = algos.take_nd(
3535+
np.asarray(self.levels[i]._values), self_codes, allow_fill=False
3536+
)
35353537

35363538
other_codes = other_codes[~other_mask]
3537-
other_values = other.levels[i]._values.take(other_codes)
3539+
other_values = other_values = algos.take_nd(
3540+
np.asarray(other.levels[i]._values), other_codes, allow_fill=False
3541+
)
35383542

35393543
# since we use NaT both datetime64 and timedelta64 we can have a
35403544
# situation where a level is typed say timedelta64 in self (IOW it

pandas/core/internals/managers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def items(self) -> Index:
303303

304304
def get_dtypes(self):
305305
dtypes = np.array([blk.dtype for blk in self.blocks])
306-
return dtypes.take(self.blknos)
306+
return algos.take_nd(dtypes, self.blknos, allow_fill=False)
307307

308308
@property
309309
def arrays(self) -> list[ArrayLike]:

pandas/core/sorting.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from pandas.core.dtypes.generic import ABCMultiIndex
2929
from pandas.core.dtypes.missing import isna
3030

31+
from pandas.core import algorithms
3132
from pandas.core.construction import extract_array
3233

3334
if TYPE_CHECKING:
@@ -656,10 +657,10 @@ def _reorder_by_uniques(uniques, labels):
656657
mask = labels < 0
657658

658659
# move labels to right locations (ie, unsort ascending labels)
659-
labels = reverse_indexer.take(labels)
660+
labels = algorithms.take_nd(reverse_indexer, labels, allow_fill=False)
660661
np.putmask(labels, mask, -1)
661662

662663
# sort observed ids
663-
uniques = uniques.take(sorter)
664+
uniques = algorithms.take_nd(uniques, sorter, allow_fill=False)
664665

665666
return uniques, labels

0 commit comments

Comments
 (0)