From 989a790d8ac552eeca1eeb3c58aac83ba2d44b5e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 28 Oct 2020 10:20:31 -0700 Subject: [PATCH 1/7] CI: 32 bit maybe_indices_to_slice --- pandas/_libs/lib.pyx | 3 ++- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/datetimelike.py | 7 ++++--- pandas/core/indexes/multi.py | 2 +- pandas/tests/libs/test_lib.py | 22 +++++++++++----------- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 2cb4df7e054fe..e493e5e9d41d3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -36,6 +36,7 @@ from numpy cimport ( float32_t, float64_t, int64_t, + intp_t, ndarray, uint8_t, uint64_t, @@ -490,7 +491,7 @@ def has_infs_f8(const float64_t[:] arr) -> bool: return False -def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len): +def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) int k, vstart, vlast, v diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f23363f3a3efa..50887ef63bcab 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5191,7 +5191,9 @@ def get_slice_bound(self, label, side: str_t, kind) -> int: if is_bool_dtype(slc): slc = lib.maybe_booleans_to_slice(slc.view("u1")) else: - slc = lib.maybe_indices_to_slice(slc.astype("i8"), len(self)) + slc = lib.maybe_indices_to_slice( + slc.astype(np.intp, copy=False), len(self) + ) if isinstance(slc, np.ndarray): raise KeyError( f"Cannot get {side} slice bound for non-unique " diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 863880e222b5d..c3f1f084d76fa 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -14,7 +14,6 @@ from pandas.util._decorators import Appender, cache_readonly, doc from pandas.core.dtypes.common import ( - ensure_int64, is_bool_dtype, is_categorical_dtype, is_dtype_equal, @@ -187,7 +186,7 @@ def __contains__(self, key: Any) -> bool: @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = ensure_int64(indices) + indices = np.asarray(indices, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) @@ -586,7 +585,9 @@ def delete(self, loc): freq = self.freq else: if is_list_like(loc): - loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) + loc = lib.maybe_indices_to_slice( + np.asarray(loc, dtype=np.intp), len(self) + ) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cbc0617ae96d3..b4934a35b7748 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2769,7 +2769,7 @@ def get_loc(self, key, method=None): def _maybe_to_slice(loc): """convert integer indexer to boolean mask or slice if possible""" - if not isinstance(loc, np.ndarray) or loc.dtype != "int64": + if not isinstance(loc, np.ndarray) or loc.dtype != np.intp: return loc loc = lib.maybe_indices_to_slice(loc, len(self)) diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index c9c34916be32b..da3e18c8d9634 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -50,7 +50,7 @@ def test_maybe_indices_to_slice_left_edge(self): target = np.arange(100) # slice - indices = np.array([], dtype=np.int64) + indices = np.array([], dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert isinstance(maybe_slice, slice) @@ -58,7 +58,7 @@ def test_maybe_indices_to_slice_left_edge(self): for end in [1, 2, 5, 20, 99]: for step in [1, 2, 4]: - indices = np.arange(0, end, step, dtype=np.int64) + indices = np.arange(0, end, step, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert isinstance(maybe_slice, slice) @@ -73,7 +73,7 @@ def test_maybe_indices_to_slice_left_edge(self): # not slice for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]: - indices = np.array(case, dtype=np.int64) + indices = np.array(case, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert not isinstance(maybe_slice, slice) @@ -86,7 +86,7 @@ def test_maybe_indices_to_slice_right_edge(self): # slice for start in [0, 2, 5, 20, 97, 98]: for step in [1, 2, 4]: - indices = np.arange(start, 99, step, dtype=np.int64) + indices = np.arange(start, 99, step, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert isinstance(maybe_slice, slice) @@ -100,7 +100,7 @@ def test_maybe_indices_to_slice_right_edge(self): tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) # not slice - indices = np.array([97, 98, 99, 100], dtype=np.int64) + indices = np.array([97, 98, 99, 100], dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert not isinstance(maybe_slice, slice) @@ -113,7 +113,7 @@ def test_maybe_indices_to_slice_right_edge(self): with pytest.raises(IndexError, match=msg): target[maybe_slice] - indices = np.array([100, 99, 98, 97], dtype=np.int64) + indices = np.array([100, 99, 98, 97], dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert not isinstance(maybe_slice, slice) @@ -125,7 +125,7 @@ def test_maybe_indices_to_slice_right_edge(self): target[maybe_slice] for case in [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]: - indices = np.array(case, dtype=np.int64) + indices = np.array(case, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert not isinstance(maybe_slice, slice) @@ -137,7 +137,7 @@ def test_maybe_indices_to_slice_both_edges(self): # slice for step in [1, 2, 4, 5, 8, 9]: - indices = np.arange(0, 9, step, dtype=np.int64) + indices = np.arange(0, 9, step, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert isinstance(maybe_slice, slice) tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) @@ -150,7 +150,7 @@ def test_maybe_indices_to_slice_both_edges(self): # not slice for case in [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]: - indices = np.array(case, dtype=np.int64) + indices = np.array(case, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert not isinstance(maybe_slice, slice) tm.assert_numpy_array_equal(maybe_slice, indices) @@ -162,7 +162,7 @@ def test_maybe_indices_to_slice_middle(self): # slice for start, end in [(2, 10), (5, 25), (65, 97)]: for step in [1, 2, 4, 20]: - indices = np.arange(start, end, step, dtype=np.int64) + indices = np.arange(start, end, step, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert isinstance(maybe_slice, slice) @@ -177,7 +177,7 @@ def test_maybe_indices_to_slice_middle(self): # not slice for case in [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]: - indices = np.array(case, dtype=np.int64) + indices = np.array(case, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) assert not isinstance(maybe_slice, slice) From 82200ee319bc846795c835b271fc4f42ebbc437e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 28 Oct 2020 11:05:09 -0700 Subject: [PATCH 2/7] accept either --- pandas/_libs/lib.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e493e5e9d41d3..5165b5bb2890d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -36,7 +36,6 @@ from numpy cimport ( float32_t, float64_t, int64_t, - intp_t, ndarray, uint8_t, uint64_t, @@ -491,7 +490,7 @@ def has_infs_f8(const float64_t[:] arr) -> bool: return False -def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len): +def maybe_indices_to_slice(ndarray[cython.integral] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) int k, vstart, vlast, v From 525818f89982bb852cd76b38188d6c4e4c8f2870 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 28 Oct 2020 11:45:16 -0700 Subject: [PATCH 3/7] revert --- pandas/core/indexes/base.py | 4 +--- pandas/core/indexes/datetimelike.py | 7 +++---- pandas/core/indexes/multi.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 50887ef63bcab..f23363f3a3efa 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5191,9 +5191,7 @@ def get_slice_bound(self, label, side: str_t, kind) -> int: if is_bool_dtype(slc): slc = lib.maybe_booleans_to_slice(slc.view("u1")) else: - slc = lib.maybe_indices_to_slice( - slc.astype(np.intp, copy=False), len(self) - ) + slc = lib.maybe_indices_to_slice(slc.astype("i8"), len(self)) if isinstance(slc, np.ndarray): raise KeyError( f"Cannot get {side} slice bound for non-unique " diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c3f1f084d76fa..863880e222b5d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -14,6 +14,7 @@ from pandas.util._decorators import Appender, cache_readonly, doc from pandas.core.dtypes.common import ( + ensure_int64, is_bool_dtype, is_categorical_dtype, is_dtype_equal, @@ -186,7 +187,7 @@ def __contains__(self, key: Any) -> bool: @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = np.asarray(indices, dtype=np.intp) + indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) @@ -585,9 +586,7 @@ def delete(self, loc): freq = self.freq else: if is_list_like(loc): - loc = lib.maybe_indices_to_slice( - np.asarray(loc, dtype=np.intp), len(self) - ) + loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b4934a35b7748..cbc0617ae96d3 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2769,7 +2769,7 @@ def get_loc(self, key, method=None): def _maybe_to_slice(loc): """convert integer indexer to boolean mask or slice if possible""" - if not isinstance(loc, np.ndarray) or loc.dtype != np.intp: + if not isinstance(loc, np.ndarray) or loc.dtype != "int64": return loc loc = lib.maybe_indices_to_slice(loc, len(self)) From 8425c5fbefe9a3b65bfe22dafd555eb3a6ff755c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 28 Oct 2020 12:49:55 -0700 Subject: [PATCH 4/7] troubleshoot --- pandas/_libs/lib.pyx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5165b5bb2890d..e56efef54d50c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -35,6 +35,7 @@ from numpy cimport ( flatiter, float32_t, float64_t, + int32_t, int64_t, ndarray, uint8_t, @@ -490,7 +491,12 @@ def has_infs_f8(const float64_t[:] arr) -> bool: return False -def maybe_indices_to_slice(ndarray[cython.integral] indices, int max_len): +ctypedef fused signed_int_t: + int32_t + int64_t + + +def maybe_indices_to_slice(ndarray[signed_int_t] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) int k, vstart, vlast, v From f6fd8cbf716e3cbcbe8464d9206d7f0fb75a39b2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 28 Oct 2020 14:52:43 -0700 Subject: [PATCH 5/7] try again --- pandas/_libs/lib.pyx | 9 ++------- pandas/core/frame.py | 4 +++- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/datetimelike.py | 7 ++++--- pandas/core/indexes/multi.py | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e56efef54d50c..e493e5e9d41d3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -35,8 +35,8 @@ from numpy cimport ( flatiter, float32_t, float64_t, - int32_t, int64_t, + intp_t, ndarray, uint8_t, uint64_t, @@ -491,12 +491,7 @@ def has_infs_f8(const float64_t[:] arr) -> bool: return False -ctypedef fused signed_int_t: - int32_t - int64_t - - -def maybe_indices_to_slice(ndarray[signed_int_t] indices, int max_len): +def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) int k, vstart, vlast, v diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 12cbee64c5e24..8f32b859fed2d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2921,7 +2921,9 @@ def __getitem__(self, key): indexer = convert_to_index_sliceable(self, key) if indexer is not None: if isinstance(indexer, np.ndarray): - indexer = lib.maybe_indices_to_slice(indexer, len(self)) + indexer = lib.maybe_indices_to_slice( + indexer.astype(np.intp, copy=False), len(self) + ) # either we have a slice or we have a string that can be converted # to a slice for partial-string date indexing return self._slice(indexer, axis=0) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f23363f3a3efa..50887ef63bcab 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5191,7 +5191,9 @@ def get_slice_bound(self, label, side: str_t, kind) -> int: if is_bool_dtype(slc): slc = lib.maybe_booleans_to_slice(slc.view("u1")) else: - slc = lib.maybe_indices_to_slice(slc.astype("i8"), len(self)) + slc = lib.maybe_indices_to_slice( + slc.astype(np.intp, copy=False), len(self) + ) if isinstance(slc, np.ndarray): raise KeyError( f"Cannot get {side} slice bound for non-unique " diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 863880e222b5d..c3f1f084d76fa 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -14,7 +14,6 @@ from pandas.util._decorators import Appender, cache_readonly, doc from pandas.core.dtypes.common import ( - ensure_int64, is_bool_dtype, is_categorical_dtype, is_dtype_equal, @@ -187,7 +186,7 @@ def __contains__(self, key: Any) -> bool: @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = ensure_int64(indices) + indices = np.asarray(indices, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) @@ -586,7 +585,9 @@ def delete(self, loc): freq = self.freq else: if is_list_like(loc): - loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) + loc = lib.maybe_indices_to_slice( + np.asarray(loc, dtype=np.intp), len(self) + ) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cbc0617ae96d3..b4934a35b7748 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2769,7 +2769,7 @@ def get_loc(self, key, method=None): def _maybe_to_slice(loc): """convert integer indexer to boolean mask or slice if possible""" - if not isinstance(loc, np.ndarray) or loc.dtype != "int64": + if not isinstance(loc, np.ndarray) or loc.dtype != np.intp: return loc loc = lib.maybe_indices_to_slice(loc, len(self)) From ab11f2b867df5611fc8ff758a331b4f0263890aa Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 28 Oct 2020 15:24:21 -0700 Subject: [PATCH 6/7] intp --- pandas/core/internals/managers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8b67788b1a1a1..49ca8f9ad55e9 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -233,8 +233,8 @@ def _rebuild_blknos_and_blklocs(self) -> None: """ Update mgr._blknos / mgr._blklocs. """ - new_blknos = np.empty(self.shape[0], dtype=np.int64) - new_blklocs = np.empty(self.shape[0], dtype=np.int64) + new_blknos = np.empty(self.shape[0], dtype=np.intp) + new_blklocs = np.empty(self.shape[0], dtype=np.intp) new_blknos.fill(-1) new_blklocs.fill(-1) From e4aef4ea667e036bbd6ab5f29e15d2965f79d8ab Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 28 Oct 2020 17:57:06 -0700 Subject: [PATCH 7/7] multiindex np.intp --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b4934a35b7748..bdd3afe747d1d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2816,7 +2816,7 @@ def _maybe_to_slice(loc): stacklevel=10, ) - loc = np.arange(start, stop, dtype="int64") + loc = np.arange(start, stop, dtype=np.intp) for i, k in enumerate(follow_key, len(lead_key)): mask = self.codes[i][loc] == self._get_loc_single_level_index(