Skip to content

Commit e40c14d

Browse files
authored
TYP: intp in libalgos (pandas-dev#40623)
1 parent acacff3 commit e40c14d

File tree

6 files changed

+36
-38
lines changed

6 files changed

+36
-38
lines changed

pandas/_libs/algos.pyx

+7-5
Original file line numberDiff line numberDiff line change
@@ -591,16 +591,17 @@ def validate_limit(nobs: int, limit=None) -> int:
591591

592592
@cython.boundscheck(False)
593593
@cython.wraparound(False)
594-
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
594+
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
595+
# -> ndarray[intp_t, ndim=1]
595596
cdef:
596597
Py_ssize_t i, j, nleft, nright
597-
ndarray[int64_t, ndim=1] indexer
598+
ndarray[intp_t, ndim=1] indexer
598599
algos_t cur, next_val
599600
int lim, fill_count = 0
600601

601602
nleft = len(old)
602603
nright = len(new)
603-
indexer = np.empty(nright, dtype=np.int64)
604+
indexer = np.empty(nright, dtype=np.intp)
604605
indexer[:] = -1
605606

606607
lim = validate_limit(nright, limit)
@@ -737,15 +738,16 @@ D
737738
@cython.boundscheck(False)
738739
@cython.wraparound(False)
739740
def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
741+
# -> ndarray[intp_t, ndim=1]
740742
cdef:
741743
Py_ssize_t i, j, nleft, nright
742-
ndarray[int64_t, ndim=1] indexer
744+
ndarray[intp_t, ndim=1] indexer
743745
algos_t cur, prev
744746
int lim, fill_count = 0
745747

746748
nleft = len(old)
747749
nright = len(new)
748-
indexer = np.empty(nright, dtype=np.int64)
750+
indexer = np.empty(nright, dtype=np.intp)
749751
indexer[:] = -1
750752

751753
lim = validate_limit(nright, limit)

pandas/_libs/algos_take_helper.pxi.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
219219
fill_value=np.nan):
220220
cdef:
221221
Py_ssize_t i, j, k, n, idx
222-
ndarray[int64_t] idx0 = indexer[0]
223-
ndarray[int64_t] idx1 = indexer[1]
222+
ndarray[intp_t] idx0 = indexer[0]
223+
ndarray[intp_t] idx1 = indexer[1]
224224
{{c_type_out}} fv
225225

226226
n = len(idx0)

pandas/_libs/index.pyx

+6-7
Original file line numberDiff line numberDiff line change
@@ -458,19 +458,19 @@ cdef class DatetimeEngine(Int64Engine):
458458
def get_indexer(self, ndarray values):
459459
self._ensure_mapping_populated()
460460
if values.dtype != self._get_box_dtype():
461-
return np.repeat(-1, len(values)).astype('i4')
461+
return np.repeat(-1, len(values)).astype(np.intp)
462462
values = np.asarray(values).view('i8')
463463
return self.mapping.lookup(values)
464464

465465
def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
466466
if other.dtype != self._get_box_dtype():
467-
return np.repeat(-1, len(other)).astype('i4')
467+
return np.repeat(-1, len(other)).astype(np.intp)
468468
other = np.asarray(other).view('i8')
469469
return algos.pad(self._get_index_values(), other, limit=limit)
470470

471471
def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
472472
if other.dtype != self._get_box_dtype():
473-
return np.repeat(-1, len(other)).astype('i4')
473+
return np.repeat(-1, len(other)).astype(np.intp)
474474
other = np.asarray(other).view('i8')
475475
return algos.backfill(self._get_index_values(), other, limit=limit)
476476

@@ -653,7 +653,7 @@ cdef class BaseMultiIndexCodesEngine:
653653
ndarray[int64_t, ndim=1] target_order
654654
ndarray[object, ndim=1] target_values
655655
ndarray[int64_t, ndim=1] new_codes, new_target_codes
656-
ndarray[int64_t, ndim=1] sorted_indexer
656+
ndarray[intp_t, ndim=1] sorted_indexer
657657

658658
target_order = np.argsort(target).astype('int64')
659659
target_values = target[target_order]
@@ -694,9 +694,8 @@ cdef class BaseMultiIndexCodesEngine:
694694
next_code += 1
695695

696696
# get the indexer, and undo the sorting of `target.values`
697-
sorted_indexer = (
698-
algos.backfill if method == "backfill" else algos.pad
699-
)(new_codes, new_target_codes, limit=limit).astype('int64')
697+
algo = algos.backfill if method == "backfill" else algos.pad
698+
sorted_indexer = algo(new_codes, new_target_codes, limit=limit)
700699
return sorted_indexer[np.argsort(target_order)]
701700

702701
def get_loc(self, object key):

pandas/core/array_algos/take.py

+12-13
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,7 @@
1515
from pandas._typing import ArrayLike
1616

1717
from pandas.core.dtypes.cast import maybe_promote
18-
from pandas.core.dtypes.common import (
19-
ensure_int64,
20-
ensure_platform_int,
21-
)
18+
from pandas.core.dtypes.common import ensure_platform_int
2219
from pandas.core.dtypes.missing import na_value_for_dtype
2320

2421
from pandas.core.construction import ensure_wrapped_if_datetimelike
@@ -201,7 +198,7 @@ def take_1d(
201198

202199

203200
def take_2d_multi(
204-
arr: np.ndarray, indexer: np.ndarray, fill_value=np.nan
201+
arr: np.ndarray, indexer: tuple[np.ndarray, np.ndarray], fill_value=np.nan
205202
) -> np.ndarray:
206203
"""
207204
Specialized Cython take which sets NaN values in one pass.
@@ -214,11 +211,9 @@ def take_2d_multi(
214211

215212
row_idx, col_idx = indexer
216213

217-
row_idx = ensure_int64(row_idx)
218-
col_idx = ensure_int64(col_idx)
219-
# error: Incompatible types in assignment (expression has type "Tuple[Any, Any]",
220-
# variable has type "ndarray")
221-
indexer = row_idx, col_idx # type: ignore[assignment]
214+
row_idx = ensure_platform_int(row_idx)
215+
col_idx = ensure_platform_int(col_idx)
216+
indexer = row_idx, col_idx
222217
mask_info = None
223218

224219
# check for promotion based on types only (do this first because
@@ -474,19 +469,23 @@ def _take_nd_object(
474469
if arr.dtype != out.dtype:
475470
arr = arr.astype(out.dtype)
476471
if arr.shape[axis] > 0:
477-
arr.take(ensure_platform_int(indexer), axis=axis, out=out)
472+
arr.take(indexer, axis=axis, out=out)
478473
if needs_masking:
479474
outindexer = [slice(None)] * arr.ndim
480475
outindexer[axis] = mask
481476
out[tuple(outindexer)] = fill_value
482477

483478

484479
def _take_2d_multi_object(
485-
arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value, mask_info
480+
arr: np.ndarray,
481+
indexer: tuple[np.ndarray, np.ndarray],
482+
out: np.ndarray,
483+
fill_value,
484+
mask_info,
486485
) -> None:
487486
# this is not ideal, performance-wise, but it's better than raising
488487
# an exception (best to optimize in Cython to avoid getting here)
489-
row_idx, col_idx = indexer
488+
row_idx, col_idx = indexer # both np.intp
490489
if mask_info is not None:
491490
(row_mask, col_mask), (row_needs, col_needs) = mask_info
492491
else:

pandas/core/frame.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -4564,9 +4564,7 @@ def _reindex_multi(self, axes, copy: bool, fill_value) -> DataFrame:
45644564
indexer = row_indexer, col_indexer
45654565
# error: Argument 2 to "take_2d_multi" has incompatible type "Tuple[Any,
45664566
# Any]"; expected "ndarray"
4567-
new_values = take_2d_multi(
4568-
self.values, indexer, fill_value=fill_value # type: ignore[arg-type]
4569-
)
4567+
new_values = take_2d_multi(self.values, indexer, fill_value=fill_value)
45704568
return self._constructor(new_values, index=new_index, columns=new_columns)
45714569
else:
45724570
return self._reindex_with_indexers(

pandas/tests/test_algos.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1784,19 +1784,19 @@ def test_pad_backfill_object_segfault():
17841784
new = np.array([datetime(2010, 12, 31)], dtype="O")
17851785

17861786
result = libalgos.pad["object"](old, new)
1787-
expected = np.array([-1], dtype=np.int64)
1787+
expected = np.array([-1], dtype=np.intp)
17881788
tm.assert_numpy_array_equal(result, expected)
17891789

17901790
result = libalgos.pad["object"](new, old)
1791-
expected = np.array([], dtype=np.int64)
1791+
expected = np.array([], dtype=np.intp)
17921792
tm.assert_numpy_array_equal(result, expected)
17931793

17941794
result = libalgos.backfill["object"](old, new)
1795-
expected = np.array([-1], dtype=np.int64)
1795+
expected = np.array([-1], dtype=np.intp)
17961796
tm.assert_numpy_array_equal(result, expected)
17971797

17981798
result = libalgos.backfill["object"](new, old)
1799-
expected = np.array([], dtype=np.int64)
1799+
expected = np.array([], dtype=np.intp)
18001800
tm.assert_numpy_array_equal(result, expected)
18011801

18021802

@@ -1822,15 +1822,15 @@ def test_backfill(self):
18221822

18231823
filler = libalgos.backfill["int64_t"](old.values, new.values)
18241824

1825-
expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64)
1825+
expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.intp)
18261826
tm.assert_numpy_array_equal(filler, expect_filler)
18271827

18281828
# corner case
18291829
old = Index([1, 4])
18301830
new = Index(list(range(5, 10)))
18311831
filler = libalgos.backfill["int64_t"](old.values, new.values)
18321832

1833-
expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64)
1833+
expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp)
18341834
tm.assert_numpy_array_equal(filler, expect_filler)
18351835

18361836
def test_pad(self):
@@ -1839,14 +1839,14 @@ def test_pad(self):
18391839

18401840
filler = libalgos.pad["int64_t"](old.values, new.values)
18411841

1842-
expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64)
1842+
expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.intp)
18431843
tm.assert_numpy_array_equal(filler, expect_filler)
18441844

18451845
# corner case
18461846
old = Index([5, 10])
18471847
new = Index(np.arange(5))
18481848
filler = libalgos.pad["int64_t"](old.values, new.values)
1849-
expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64)
1849+
expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp)
18501850
tm.assert_numpy_array_equal(filler, expect_filler)
18511851

18521852

0 commit comments

Comments
 (0)