diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 03f4ce273de6e..c2b9c723b7c72 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1030,9 +1030,9 @@ def rank_1d( if rank_t is object: nan_fill_val = Infinity() elif rank_t is int64_t: - nan_fill_val = np.iinfo(np.int64).max + nan_fill_val = util.INT64_MAX elif rank_t is uint64_t: - nan_fill_val = np.iinfo(np.uint64).max + nan_fill_val = util.UINT64_MAX else: nan_fill_val = np.inf order = (masked_vals, mask, labels) @@ -1393,7 +1393,7 @@ def rank_2d( # int64 and datetimelike else: - nan_value = np.iinfo(np.int64).max + nan_value = util.INT64_MAX else: if rank_t is object: diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 3f4623638c70e..077d2e60cc3a4 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -25,6 +25,9 @@ class NoDefault(Enum): ... no_default: NoDefault +i8max: int +u8max: int + def item_from_zerodim(val: object) -> object: ... def infer_dtype(value: object, skipna: bool = True) -> str: ... def is_iterator(obj: object) -> bool: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 0aec7e5e5a363..37e83ddb0ffed 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -118,6 +118,10 @@ cdef: float64_t NaN = np.NaN +# python-visible +i8max = INT64_MAX +u8max = UINT64_MAX + @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f26cf113f7d5e..7dcc83f76db75 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1092,18 +1092,19 @@ def checked_add_with_arr( # it is negative, we then check whether its sum with the element in # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow # error as well. + i8max = lib.i8max + i8min = iNaT + mask1 = b2 > 0 mask2 = b2 < 0 if not mask1.any(): - to_raise = ((np.iinfo(np.int64).min - b2 > arr) & not_nan).any() + to_raise = ((i8min - b2 > arr) & not_nan).any() elif not mask2.any(): - to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any() + to_raise = ((i8max - b2 < arr) & not_nan).any() else: - to_raise = ( - (np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1] - ).any() or ( - (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2] + to_raise = ((i8max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or ( + (i8min - b2[mask2] > arr[mask2]) & not_nan[mask2] ).any() if to_raise: diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index cac9fcd40fa52..3909875e5660a 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -6,6 +6,7 @@ import numpy as np +from pandas._libs.lib import i8max from pandas._libs.tslibs import ( BaseOffset, OutOfBoundsDatetime, @@ -103,7 +104,7 @@ def _generate_range_overflow_safe( # GH#14187 raise instead of incorrectly wrapping around assert side in ["start", "end"] - i64max = np.uint64(np.iinfo(np.int64).max) + i64max = np.uint64(i8max) msg = f"Cannot generate range with {side}={endpoint} and periods={periods}" with np.errstate(over="raise"): @@ -180,7 +181,7 @@ def _generate_range_overflow_safe_signed( # error: Incompatible types in assignment (expression has type # "unsignedinteger[_64Bit]", variable has type "signedinteger[_64Bit]") result = np.uint64(endpoint) + np.uint64(addend) # type: ignore[assignment] - i64max = np.uint64(np.iinfo(np.int64).max) + i64max = np.uint64(i8max) assert result > i64max if result <= i64max + np.uint64(stride): # error: Incompatible return value type (got "unsignedinteger", expected diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index ecdf2624c8ec1..c34944985f2b6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -205,7 +205,7 @@ def _get_fill_value( else: if fill_value_typ == "+inf": # need the max int here - return np.iinfo(np.int64).max + return lib.i8max else: return iNaT @@ -376,7 +376,7 @@ def _wrap_results(result, dtype: np.dtype, fill_value=None): result = np.nan # raise if we have a timedelta64[ns] which is too large - if np.fabs(result) > np.iinfo(np.int64).max: + if np.fabs(result) > lib.i8max: raise ValueError("overflow in timedelta operation") result = Timedelta(result, unit="ns") @@ -1758,7 +1758,7 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: if accum_func == np.minimum.accumulate: # Note: the accum_func comparison fails as an "is" comparison y = values.view("i8") - y[mask] = np.iinfo(np.int64).max + y[mask] = lib.i8max changed = True else: y = values diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 8531f93fba321..712e9785f47f7 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -40,8 +40,6 @@ from pandas import MultiIndex from pandas.core.indexes.base import Index -_INT64_MAX = np.iinfo(np.int64).max - def get_indexer_indexer( target: Index, @@ -133,7 +131,7 @@ def _int64_cut_off(shape) -> int: acc = 1 for i, mul in enumerate(shape): acc *= int(mul) - if not acc < _INT64_MAX: + if not acc < lib.i8max: return i return len(shape) @@ -153,7 +151,7 @@ def maybe_lift(lab, size) -> tuple[np.ndarray, int]: labels = list(labels) # Iteratively process all the labels in chunks sized so less - # than _INT64_MAX unique int ids will be required for each chunk + # than lib.i8max unique int ids will be required for each chunk while True: # how many levels can be done without overflow: nlev = _int64_cut_off(lshape) @@ -215,7 +213,7 @@ def is_int64_overflow_possible(shape) -> bool: for x in shape: the_prod *= int(x) - return the_prod >= _INT64_MAX + return the_prod >= lib.i8max def decons_group_index(comp_labels, shape): diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index fb5002648b6a5..962728b2f38c4 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -14,6 +14,7 @@ import numpy as np +from pandas._libs import lib from pandas._libs.hashing import hash_object_array from pandas._typing import ( ArrayLike, @@ -244,7 +245,7 @@ def _hash_categorical(cat: Categorical, encoding: str, hash_key: str) -> np.ndar result = np.zeros(len(mask), dtype="uint64") if mask.any(): - result[mask] = np.iinfo(np.uint64).max + result[mask] = lib.u8max return result diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 8b42bca8b8a0c..4aa2f62fe85a0 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from pandas._libs import lib from pandas._libs.tslibs import ( NaT, iNaT, @@ -391,8 +392,7 @@ def test_round_implementation_bounds(self): "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] ) def test_round_sanity(self, method, n, request): - iinfo = np.iinfo(np.int64) - val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64) + val = np.random.randint(iNaT + 1, lib.i8max, dtype=np.int64) td = Timedelta(val) assert method(td, "ns") == td @@ -552,8 +552,8 @@ def test_implementation_limits(self): # GH 12727 # timedelta limits correspond to int64 boundaries - assert min_td.value == np.iinfo(np.int64).min + 1 - assert max_td.value == np.iinfo(np.int64).max + assert min_td.value == iNaT + 1 + assert max_td.value == lib.i8max # Beyond lower limit, a NAT before the Overflow assert (min_td - Timedelta(1, "ns")) is NaT diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index aab0b2e6d31ef..366c0f7cf2f74 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -6,11 +6,13 @@ import pytz from pytz import utc +from pandas._libs import lib from pandas._libs.tslibs import ( NaT, Timedelta, Timestamp, conversion, + iNaT, to_offset, ) from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG @@ -279,8 +281,7 @@ def test_round_implementation_bounds(self): "method", [Timestamp.round, Timestamp.floor, Timestamp.ceil] ) def test_round_sanity(self, method, n): - iinfo = np.iinfo(np.int64) - val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64) + val = np.random.randint(iNaT + 1, lib.i8max, dtype=np.int64) ts = Timestamp(val) def checker(res, ts, nanos):