From 6acd43e786148537a190374bdcc9618135f5c873 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Jun 2021 18:50:21 -0700 Subject: [PATCH 1/7] CI: troubleshoot py310 build --- .github/workflows/python-dev.yml | 2 +- pandas/core/algorithms.py | 14 ++++++++------ pandas/core/arrays/_ranges.py | 4 ++-- pandas/core/nanops.py | 6 +++--- pandas/core/sorting.py | 3 ++- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml index 38b1aa9ae7047..feffd50d392ec 100644 --- a/.github/workflows/python-dev.yml +++ b/.github/workflows/python-dev.yml @@ -46,7 +46,7 @@ jobs: - name: Test with pytest run: | - coverage run -m pytest -m 'not slow and not network and not clipboard' pandas + coverage run -m pytest -m 'not slow and not network and not clipboard' pandas --ignore=pandas/tests/scalar/timestamp continue-on-error: true - name: Publish test results diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f26cf113f7d5e..2856f5f79e092 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -20,6 +20,7 @@ import numpy as np from pandas._libs import ( + Timestamp, algos, hashtable as htable, iNaT, @@ -1092,18 +1093,19 @@ def checked_add_with_arr( # it is negative, we then check whether its sum with the element in # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow # error as well. + i8max = Timestamp.max.value # GH#? + i8min = iNaT + mask1 = b2 > 0 mask2 = b2 < 0 if not mask1.any(): - to_raise = ((np.iinfo(np.int64).min - b2 > arr) & not_nan).any() + to_raise = ((i8min - b2 > arr) & not_nan).any() elif not mask2.any(): - to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any() + to_raise = ((i8max - b2 < arr) & not_nan).any() else: - to_raise = ( - (np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1] - ).any() or ( - (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2] + to_raise = ((i8max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or ( + (i8min - b2[mask2] > arr[mask2]) & not_nan[mask2] ).any() if to_raise: diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index cac9fcd40fa52..9cb01ca1ffd19 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -103,7 +103,7 @@ def _generate_range_overflow_safe( # GH#14187 raise instead of incorrectly wrapping around assert side in ["start", "end"] - i64max = np.uint64(np.iinfo(np.int64).max) + i64max = np.uint64(Timestamp.max.value) # GH#??? msg = f"Cannot generate range with {side}={endpoint} and periods={periods}" with np.errstate(over="raise"): @@ -180,7 +180,7 @@ def _generate_range_overflow_safe_signed( # error: Incompatible types in assignment (expression has type # "unsignedinteger[_64Bit]", variable has type "signedinteger[_64Bit]") result = np.uint64(endpoint) + np.uint64(addend) # type: ignore[assignment] - i64max = np.uint64(np.iinfo(np.int64).max) + i64max = np.uint64(Timestamp.max.value) assert result > i64max if result <= i64max + np.uint64(stride): # error: Incompatible return value type (got "unsignedinteger", expected diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index ecdf2624c8ec1..5a280bc48e88f 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -205,7 +205,7 @@ def _get_fill_value( else: if fill_value_typ == "+inf": # need the max int here - return np.iinfo(np.int64).max + return Timedelta.max.value # GH#??? else: return iNaT @@ -376,7 +376,7 @@ def _wrap_results(result, dtype: np.dtype, fill_value=None): result = np.nan # raise if we have a timedelta64[ns] which is too large - if np.fabs(result) > np.iinfo(np.int64).max: + if np.fabs(result) > Timedelta.max.value: raise ValueError("overflow in timedelta operation") result = Timedelta(result, unit="ns") @@ -1758,7 +1758,7 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: if accum_func == np.minimum.accumulate: # Note: the accum_func comparison fails as an "is" comparison y = values.view("i8") - y[mask] = np.iinfo(np.int64).max + y[mask] = Timedelta.max.value changed = True else: y = values diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 8531f93fba321..24db06fe2db8b 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -13,6 +13,7 @@ import numpy as np from pandas._libs import ( + Timestamp, algos, hashtable, lib, @@ -40,7 +41,7 @@ from pandas import MultiIndex from pandas.core.indexes.base import Index -_INT64_MAX = np.iinfo(np.int64).max +_INT64_MAX = Timestamp.max.value # GH#? def get_indexer_indexer( From b8debbaeaea2f178976000466721c7811af3d459 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Jun 2021 18:52:18 -0700 Subject: [PATCH 2/7] revert python-dev.yml --- .github/workflows/python-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml index feffd50d392ec..38b1aa9ae7047 100644 --- a/.github/workflows/python-dev.yml +++ b/.github/workflows/python-dev.yml @@ -46,7 +46,7 @@ jobs: - name: Test with pytest run: | - coverage run -m pytest -m 'not slow and not network and not clipboard' pandas --ignore=pandas/tests/scalar/timestamp + coverage run -m pytest -m 'not slow and not network and not clipboard' pandas continue-on-error: true - name: Publish test results From 03e549ebca6ac1971da3f6477e2757915dbc2058 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Jun 2021 19:58:41 -0700 Subject: [PATCH 3/7] Troubleshoot py310 build --- pandas/_libs/algos.pyx | 6 +++--- pandas/_libs/lib.pyx | 4 ++++ pandas/core/util/hashing.py | 3 ++- pandas/tests/scalar/timedelta/test_timedelta.py | 8 ++++---- pandas/tests/scalar/timestamp/test_unary_ops.py | 5 +++-- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 03f4ce273de6e..c2b9c723b7c72 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1030,9 +1030,9 @@ def rank_1d( if rank_t is object: nan_fill_val = Infinity() elif rank_t is int64_t: - nan_fill_val = np.iinfo(np.int64).max + nan_fill_val = util.INT64_MAX elif rank_t is uint64_t: - nan_fill_val = np.iinfo(np.uint64).max + nan_fill_val = util.UINT64_MAX else: nan_fill_val = np.inf order = (masked_vals, mask, labels) @@ -1393,7 +1393,7 @@ def rank_2d( # int64 and datetimelike else: - nan_value = np.iinfo(np.int64).max + nan_value = util.INT64_MAX else: if rank_t is object: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 0aec7e5e5a363..86205dc9126d7 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -118,6 +118,10 @@ cdef: float64_t NaN = np.NaN +# python-visible +i8max = INT64_MAX +u8max = UINT64_MAX + @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index fb5002648b6a5..962728b2f38c4 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -14,6 +14,7 @@ import numpy as np +from pandas._libs import lib from pandas._libs.hashing import hash_object_array from pandas._typing import ( ArrayLike, @@ -244,7 +245,7 @@ def _hash_categorical(cat: Categorical, encoding: str, hash_key: str) -> np.ndar result = np.zeros(len(mask), dtype="uint64") if mask.any(): - result[mask] = np.iinfo(np.uint64).max + result[mask] = lib.u8max return result diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 8b42bca8b8a0c..4aa2f62fe85a0 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from pandas._libs import lib from pandas._libs.tslibs import ( NaT, iNaT, @@ -391,8 +392,7 @@ def test_round_implementation_bounds(self): "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] ) def test_round_sanity(self, method, n, request): - iinfo = np.iinfo(np.int64) - val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64) + val = np.random.randint(iNaT + 1, lib.i8max, dtype=np.int64) td = Timedelta(val) assert method(td, "ns") == td @@ -552,8 +552,8 @@ def test_implementation_limits(self): # GH 12727 # timedelta limits correspond to int64 boundaries - assert min_td.value == np.iinfo(np.int64).min + 1 - assert max_td.value == np.iinfo(np.int64).max + assert min_td.value == iNaT + 1 + assert max_td.value == lib.i8max # Beyond lower limit, a NAT before the Overflow assert (min_td - Timedelta(1, "ns")) is NaT diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index aab0b2e6d31ef..366c0f7cf2f74 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -6,11 +6,13 @@ import pytz from pytz import utc +from pandas._libs import lib from pandas._libs.tslibs import ( NaT, Timedelta, Timestamp, conversion, + iNaT, to_offset, ) from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG @@ -279,8 +281,7 @@ def test_round_implementation_bounds(self): "method", [Timestamp.round, Timestamp.floor, Timestamp.ceil] ) def test_round_sanity(self, method, n): - iinfo = np.iinfo(np.int64) - val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64) + val = np.random.randint(iNaT + 1, lib.i8max, dtype=np.int64) ts = Timestamp(val) def checker(res, ts, nanos): From 5a822b72d089c50a7469a106bc8843be2bb801ee Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Jun 2021 20:01:21 -0700 Subject: [PATCH 4/7] standardize usage --- pandas/core/algorithms.py | 3 +-- pandas/core/arrays/_ranges.py | 5 +++-- pandas/core/nanops.py | 6 +++--- pandas/core/sorting.py | 3 +-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2856f5f79e092..7dcc83f76db75 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -20,7 +20,6 @@ import numpy as np from pandas._libs import ( - Timestamp, algos, hashtable as htable, iNaT, @@ -1093,7 +1092,7 @@ def checked_add_with_arr( # it is negative, we then check whether its sum with the element in # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow # error as well. - i8max = Timestamp.max.value # GH#? + i8max = lib.i8max i8min = iNaT mask1 = b2 > 0 diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 9cb01ca1ffd19..3909875e5660a 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -6,6 +6,7 @@ import numpy as np +from pandas._libs.lib import i8max from pandas._libs.tslibs import ( BaseOffset, OutOfBoundsDatetime, @@ -103,7 +104,7 @@ def _generate_range_overflow_safe( # GH#14187 raise instead of incorrectly wrapping around assert side in ["start", "end"] - i64max = np.uint64(Timestamp.max.value) # GH#??? + i64max = np.uint64(i8max) msg = f"Cannot generate range with {side}={endpoint} and periods={periods}" with np.errstate(over="raise"): @@ -180,7 +181,7 @@ def _generate_range_overflow_safe_signed( # error: Incompatible types in assignment (expression has type # "unsignedinteger[_64Bit]", variable has type "signedinteger[_64Bit]") result = np.uint64(endpoint) + np.uint64(addend) # type: ignore[assignment] - i64max = np.uint64(Timestamp.max.value) + i64max = np.uint64(i8max) assert result > i64max if result <= i64max + np.uint64(stride): # error: Incompatible return value type (got "unsignedinteger", expected diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 5a280bc48e88f..c34944985f2b6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -205,7 +205,7 @@ def _get_fill_value( else: if fill_value_typ == "+inf": # need the max int here - return Timedelta.max.value # GH#??? + return lib.i8max else: return iNaT @@ -376,7 +376,7 @@ def _wrap_results(result, dtype: np.dtype, fill_value=None): result = np.nan # raise if we have a timedelta64[ns] which is too large - if np.fabs(result) > Timedelta.max.value: + if np.fabs(result) > lib.i8max: raise ValueError("overflow in timedelta operation") result = Timedelta(result, unit="ns") @@ -1758,7 +1758,7 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: if accum_func == np.minimum.accumulate: # Note: the accum_func comparison fails as an "is" comparison y = values.view("i8") - y[mask] = Timedelta.max.value + y[mask] = lib.i8max changed = True else: y = values diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 24db06fe2db8b..9c217ae16b769 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -13,7 +13,6 @@ import numpy as np from pandas._libs import ( - Timestamp, algos, hashtable, lib, @@ -41,7 +40,7 @@ from pandas import MultiIndex from pandas.core.indexes.base import Index -_INT64_MAX = Timestamp.max.value # GH#? +_INT64_MAX = lib.i8max def get_indexer_indexer( From 6e22d6bf38c74b264da75a55c1bbfecf5211260e Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Jun 2021 20:40:01 -0700 Subject: [PATCH 5/7] fix i8max, u8max --- pandas/_libs/lib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 86205dc9126d7..37e83ddb0ffed 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -119,8 +119,8 @@ cdef: float64_t NaN = np.NaN # python-visible -i8max = INT64_MAX -u8max = UINT64_MAX +i8max = INT64_MAX +u8max = UINT64_MAX @cython.wraparound(False) From d8888360037844b1b8333edb0d38db248cfaf9ca Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 14 Jun 2021 07:34:41 -0700 Subject: [PATCH 6/7] mypy fixup --- pandas/_libs/lib.pyi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 3f4623638c70e..077d2e60cc3a4 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -25,6 +25,9 @@ class NoDefault(Enum): ... no_default: NoDefault +i8max: int +u8max: int + def item_from_zerodim(val: object) -> object: ... def infer_dtype(value: object, skipna: bool = True) -> str: ... def is_iterator(obj: object) -> bool: ... From 81da4039374aff4a87804978ce5d89b130581271 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 14 Jun 2021 19:04:31 -0700 Subject: [PATCH 7/7] remove _INT64_MAX --- pandas/core/sorting.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 9c217ae16b769..712e9785f47f7 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -40,8 +40,6 @@ from pandas import MultiIndex from pandas.core.indexes.base import Index -_INT64_MAX = lib.i8max - def get_indexer_indexer( target: Index, @@ -133,7 +131,7 @@ def _int64_cut_off(shape) -> int: acc = 1 for i, mul in enumerate(shape): acc *= int(mul) - if not acc < _INT64_MAX: + if not acc < lib.i8max: return i return len(shape) @@ -153,7 +151,7 @@ def maybe_lift(lab, size) -> tuple[np.ndarray, int]: labels = list(labels) # Iteratively process all the labels in chunks sized so less - # than _INT64_MAX unique int ids will be required for each chunk + # than lib.i8max unique int ids will be required for each chunk while True: # how many levels can be done without overflow: nlev = _int64_cut_off(lshape) @@ -215,7 +213,7 @@ def is_int64_overflow_possible(shape) -> bool: for x in shape: the_prod *= int(x) - return the_prod >= _INT64_MAX + return the_prod >= lib.i8max def decons_group_index(comp_labels, shape):