Skip to content

Commit 979abe6

Browse files
authored
Revert "CLN: intp_t instead of int64_t for indexers in libs funcs (#40475)"
This reverts commit 38640d1.
1 parent 38640d1 commit 979abe6

File tree

9 files changed

+28
-42
lines changed

9 files changed

+28
-42
lines changed

pandas/_libs/algos.pyx

+6-9
Original file line numberDiff line numberDiff line change
@@ -199,23 +199,20 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
199199
200200
Returns
201201
-------
202-
ndarray[intp_t, ndim=1]
203-
Indexer
204-
ndarray[int64_t, ndim=1]
205-
Group Counts
202+
tuple
203+
1-d indexer ordered by groups, group counts.
206204
207205
Notes
208206
-----
209207
This is a reverse of the label factorization process.
210208
"""
211209
cdef:
212210
Py_ssize_t i, loc, label, n
213-
ndarray[int64_t] counts, where
214-
ndarray[intp_t] indexer
211+
ndarray[int64_t] counts, where, result
215212

216213
counts = np.zeros(ngroups + 1, dtype=np.int64)
217214
n = len(index)
218-
indexer = np.zeros(n, dtype=np.intp)
215+
result = np.zeros(n, dtype=np.int64)
219216
where = np.zeros(ngroups + 1, dtype=np.int64)
220217

221218
with nogil:
@@ -231,10 +228,10 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
231228
# this is our indexer
232229
for i in range(n):
233230
label = index[i] + 1
234-
indexer[where[label]] = i
231+
result[where[label]] = i
235232
where[label] += 1
236233

237-
return indexer, counts
234+
return result, counts
238235

239236

240237
@cython.boundscheck(False)

pandas/_libs/algos_take_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values,
6666
{{else}}
6767
def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values,
6868
{{endif}}
69-
const intp_t[:] indexer,
69+
const int64_t[:] indexer,
7070
{{c_type_out}}[:] out,
7171
fill_value=np.nan):
7272

@@ -102,7 +102,7 @@ def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
102102
{{else}}
103103
def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
104104
{{endif}}
105-
ndarray[intp_t] indexer,
105+
ndarray[int64_t] indexer,
106106
{{c_type_out}}[:, :] out,
107107
fill_value=np.nan):
108108
cdef:
@@ -156,7 +156,7 @@ def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
156156
{{else}}
157157
def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
158158
{{endif}}
159-
ndarray[intp_t] indexer,
159+
ndarray[int64_t] indexer,
160160
{{c_type_out}}[:, :] out,
161161
fill_value=np.nan):
162162

pandas/_libs/groupby.pyx

-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ from numpy cimport (
1919
int16_t,
2020
int32_t,
2121
int64_t,
22-
intp_t,
2322
ndarray,
2423
uint8_t,
2524
uint16_t,
@@ -142,7 +141,6 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
142141
Py_ssize_t i, j, N, K, ngroups, size
143142
ndarray[int64_t] _counts
144143
ndarray[float64_t, ndim=2] data
145-
ndarray[intp_t] indexer
146144
float64_t* ptr
147145

148146
assert min_count == -1, "'min_count' only used in add and prod"

pandas/_libs/join.pyx

+6-12
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
3333
Py_ssize_t max_groups):
3434
cdef:
3535
Py_ssize_t i, j, k, count = 0
36-
ndarray[intp_t] left_sorter, right_sorter
37-
ndarray[int64_t] left_count, right_count
36+
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
3837
ndarray[int64_t] left_indexer, right_indexer
3938
int64_t lc, rc
4039
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
@@ -85,8 +84,8 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
8584
Py_ssize_t max_groups, bint sort=True):
8685
cdef:
8786
Py_ssize_t i, j, k, count = 0
88-
ndarray[int64_t] left_count, right_count
89-
ndarray[intp_t] rev, left_sorter, right_sorter
87+
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
88+
ndarray rev
9089
ndarray[int64_t] left_indexer, right_indexer
9190
int64_t lc, rc
9291
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
@@ -158,8 +157,7 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
158157
Py_ssize_t max_groups):
159158
cdef:
160159
Py_ssize_t i, j, k, count = 0
161-
ndarray[intp_t] left_sorter, right_sorter
162-
ndarray[int64_t] left_count, right_count
160+
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
163161
ndarray[int64_t] left_indexer, right_indexer
164162
int64_t lc, rc
165163
int64_t left_pos = 0, right_pos = 0
@@ -217,16 +215,12 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
217215
_get_result_indexer(right_sorter, right_indexer))
218216

219217

220-
cdef ndarray[int64_t] _get_result_indexer(
221-
ndarray[intp_t] sorter, ndarray[int64_t] indexer
222-
):
218+
cdef _get_result_indexer(ndarray[int64_t] sorter, ndarray[int64_t] indexer):
223219
if len(sorter) > 0:
224220
# cython-only equivalent to
225221
# `res = algos.take_nd(sorter, indexer, fill_value=-1)`
226222
res = np.empty(len(indexer), dtype=np.int64)
227-
take_1d_int64_int64(ensure_int64(sorter), ensure_platform_int(indexer), res, -1)
228-
# FIXME: sorter is intp_t, not int64_t, opposite for indexer;
229-
# will this break on 32bit builds?
223+
take_1d_int64_int64(sorter, indexer, res, -1)
230224
else:
231225
# length-0 case
232226
res = np.empty(len(indexer), dtype=np.int64)

pandas/core/array_algos/take.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,10 @@ def _take_nd_ndarray(
117117
) -> np.ndarray:
118118

119119
if indexer is None:
120-
indexer = np.arange(arr.shape[axis], dtype=np.intp)
120+
indexer = np.arange(arr.shape[axis], dtype=np.int64)
121121
dtype, fill_value = arr.dtype, arr.dtype.type()
122122
else:
123-
indexer = ensure_platform_int(indexer)
123+
indexer = ensure_int64(indexer, copy=False)
124124
indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
125125
arr, indexer, out, fill_value, allow_fill
126126
)
@@ -317,7 +317,7 @@ def _get_take_nd_function(
317317
if func is None:
318318

319319
def func(arr, indexer, out, fill_value=np.nan):
320-
indexer = ensure_platform_int(indexer)
320+
indexer = ensure_int64(indexer)
321321
_take_nd_object(
322322
arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info
323323
)
@@ -468,7 +468,7 @@ def wrapper(
468468

469469
def _take_nd_object(
470470
arr: np.ndarray,
471-
indexer: np.ndarray, # np.ndarray[np.intp]
471+
indexer: np.ndarray,
472472
out: np.ndarray,
473473
axis: int,
474474
fill_value,
@@ -544,5 +544,4 @@ def _take_preprocess_indexer_and_fill_value(
544544
# to crash when trying to cast it to dtype)
545545
dtype, fill_value = arr.dtype, arr.dtype.type()
546546

547-
indexer = ensure_platform_int(indexer)
548547
return indexer, dtype, fill_value, mask_info

pandas/core/arrays/categorical.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1961,8 +1961,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
19611961
19621962
Returns
19631963
-------
1964-
Dict[Hashable, np.ndarray[np.intp]]
1965-
dict of categories -> indexers
1964+
dict of categories -> indexers
19661965
19671966
Examples
19681967
--------
@@ -1980,7 +1979,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
19801979
"""
19811980
categories = self.categories
19821981
r, counts = libalgos.groupsort_indexer(
1983-
self.codes.astype("int64", copy=False), categories.size
1982+
self.codes.astype("int64"), categories.size
19841983
)
19851984
counts = counts.cumsum()
19861985
_result = (r[start:end] for start, end in zip(counts, counts[1:]))

pandas/core/sorting.py

-1
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,6 @@ def get_group_index_sorter(
606606
)
607607
if do_groupsort:
608608
sorter, _ = algos.groupsort_indexer(ensure_int64(group_index), ngroups)
609-
# sorter _should_ already be intp, but mypy is not yet able to verify
610609
else:
611610
sorter = group_index.argsort(kind="mergesort")
612611
return ensure_platform_int(sorter)

pandas/tests/groupby/test_categorical.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1717,9 +1717,9 @@ def test_groupby_categorical_indices_unused_categories():
17171717
grouped = df.groupby("key", sort=False)
17181718
result = grouped.indices
17191719
expected = {
1720-
"b": np.array([0, 1], dtype="intp"),
1721-
"a": np.array([2], dtype="intp"),
1722-
"c": np.array([], dtype="intp"),
1720+
"b": np.array([0, 1], dtype="int64"),
1721+
"a": np.array([2], dtype="int64"),
1722+
"c": np.array([], dtype="int64"),
17231723
}
17241724
assert result.keys() == expected.keys()
17251725
for key in result.keys():

pandas/tests/test_algos.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2123,19 +2123,19 @@ def test_groupsort_indexer():
21232123

21242124
# need to use a stable sort
21252125
# np.argsort returns int, groupsort_indexer
2126-
# always returns intp
2126+
# always returns int64
21272127
expected = np.argsort(a, kind="mergesort")
2128-
expected = expected.astype(np.intp)
2128+
expected = expected.astype(np.int64)
21292129

21302130
tm.assert_numpy_array_equal(result, expected)
21312131

21322132
# compare with lexsort
21332133
# np.lexsort returns int, groupsort_indexer
2134-
# always returns intp
2134+
# always returns int64
21352135
key = a * 1000 + b
21362136
result = libalgos.groupsort_indexer(key, 1000000)[0]
21372137
expected = np.lexsort((b, a))
2138-
expected = expected.astype(np.intp)
2138+
expected = expected.astype(np.int64)
21392139

21402140
tm.assert_numpy_array_equal(result, expected)
21412141

0 commit comments

Comments
 (0)