Skip to content

Commit e0e47e8

Browse files
mroeschkelithomas1
andauthored
DEPS: Test NEP 50 (#55739)
* DEPS: Test NEP 50 * Use Python floats in test_maybe_promote_float_with_float * Refactor test_to_html_multiindex to allow tests to collect * Supress deprecationwarning for now * Use old invocation * Use Python ints in _range.py functions * Address test_constructor * Fix test_constructor_coercion_signed_to_unsigned * Fix test_constructor_coercion_signed_to_unsigned * Cast numpy scalars as python scalars before arith ops * add xfail reason to TestCoercionFloat32 * only set promotion state for numpy > 2.0 * order was backwards * Version promotion state call * fix timedelta tests * go for green * fix non npdev too? * fixes * adjust xfail condition * go for green * add tests * add negative numbers test * updates * fix accidental changes * more * simplify * linter --------- Co-authored-by: Thomas Li <[email protected]>
1 parent 0d8a0f3 commit e0e47e8

File tree

15 files changed

+177
-39
lines changed

15 files changed

+177
-39
lines changed

.github/workflows/unit-tests.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
- name: "Numpy Dev"
9393
env_file: actions-311-numpydev.yaml
9494
pattern: "not slow and not network and not single_cpu"
95-
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
95+
test_args: "-W error::FutureWarning"
9696
- name: "Pyarrow Nightly"
9797
env_file: actions-311-pyarrownightly.yaml
9898
pattern: "not slow and not network and not single_cpu"
@@ -115,6 +115,7 @@ jobs:
115115
TEST_ARGS: ${{ matrix.test_args || '' }}
116116
PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }}
117117
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
118+
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
118119
# Clipboard tests
119120
QT_QPA_PLATFORM: offscreen
120121
concurrency:

ci/run_tests.sh

+1-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED
1010

1111
COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml"
1212

13-
# TODO: Support NEP 50 and remove NPY_PROMOTION_STATE
14-
PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"
13+
PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"
1514

1615
if [[ "$PATTERN" ]]; then
1716
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""

pandas/_libs/tslibs/timedeltas.pyx

+12
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,12 @@ class Timedelta(_Timedelta):
20602060
# integers or floats
20612061
if util.is_nan(other):
20622062
return NaT
2063+
# We want NumPy numeric scalars to behave like Python scalars
2064+
# post NEP 50
2065+
if isinstance(other, cnp.integer):
2066+
other = int(other)
2067+
if isinstance(other, cnp.floating):
2068+
other = float(other)
20632069
return Timedelta._from_value_and_reso(
20642070
<int64_t>(self._value/ other), self._creso
20652071
)
@@ -2114,6 +2120,12 @@ class Timedelta(_Timedelta):
21142120
elif is_integer_object(other) or is_float_object(other):
21152121
if util.is_nan(other):
21162122
return NaT
2123+
# We want NumPy numeric scalars to behave like Python scalars
2124+
# post NEP 50
2125+
if isinstance(other, cnp.integer):
2126+
other = int(other)
2127+
if isinstance(other, cnp.floating):
2128+
other = float(other)
21172129
return type(self)._from_value_and_reso(self._value// other, self._creso)
21182130

21192131
elif is_array(other):

pandas/core/arrays/_ranges.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ def generate_regular_range(
5454
iend = end._value if end is not None else None
5555
freq.nanos # raises if non-fixed frequency
5656
td = Timedelta(freq)
57-
b: int | np.int64 | np.uint64
58-
e: int | np.int64 | np.uint64
57+
b: int
58+
e: int
5959
try:
6060
td = td.as_unit(unit, round_ok=False)
6161
except ValueError as err:
@@ -96,7 +96,7 @@ def generate_regular_range(
9696

9797
def _generate_range_overflow_safe(
9898
endpoint: int, periods: int, stride: int, side: str = "start"
99-
) -> np.int64 | np.uint64:
99+
) -> int:
100100
"""
101101
Calculate the second endpoint for passing to np.arange, checking
102102
to avoid an integer overflow. Catch OverflowError and re-raise
@@ -115,7 +115,7 @@ def _generate_range_overflow_safe(
115115
116116
Returns
117117
-------
118-
other_end : np.int64 | np.uint64
118+
other_end : int
119119
120120
Raises
121121
------
@@ -163,7 +163,7 @@ def _generate_range_overflow_safe(
163163

164164
def _generate_range_overflow_safe_signed(
165165
endpoint: int, periods: int, stride: int, side: str
166-
) -> np.int64 | np.uint64:
166+
) -> int:
167167
"""
168168
A special case for _generate_range_overflow_safe where `periods * stride`
169169
can be calculated without overflowing int64 bounds.
@@ -181,7 +181,7 @@ def _generate_range_overflow_safe_signed(
181181
# Putting this into a DatetimeArray/TimedeltaArray
182182
# would incorrectly be interpreted as NaT
183183
raise OverflowError
184-
return result
184+
return int(result)
185185
except (FloatingPointError, OverflowError):
186186
# with endpoint negative and addend positive we risk
187187
# FloatingPointError; with reversed signed we risk OverflowError
@@ -200,7 +200,7 @@ def _generate_range_overflow_safe_signed(
200200
i64max = np.uint64(i8max)
201201
assert uresult > i64max
202202
if uresult <= i64max + np.uint64(stride):
203-
return uresult
203+
return int(uresult)
204204

205205
raise OutOfBoundsDatetime(
206206
f"Cannot generate range with {side}={endpoint} and periods={periods}"

pandas/core/dtypes/cast.py

+34-5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
is_supported_dtype,
4040
)
4141
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
42+
from pandas.compat.numpy import np_version_gt2
4243
from pandas.errors import (
4344
IntCastingNaNError,
4445
LossySetitemError,
@@ -1314,6 +1315,30 @@ def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj:
13141315
# which will make us upcast too far.
13151316
if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f":
13161317
right = int(right)
1318+
# After NEP 50, numpy won't inspect Python scalars
1319+
# TODO: do we need to recreate numpy's inspection logic for floats too
1320+
# (this breaks some tests)
1321+
if isinstance(right, int) and not isinstance(right, np.integer):
1322+
# This gives an unsigned type by default
1323+
# (if our number is positive)
1324+
1325+
# If our left dtype is signed, we might not want this since
1326+
# this might give us 1 dtype too big
1327+
# We should check if the corresponding int dtype (e.g. int64 for uint64)
1328+
# can hold the number
1329+
right_dtype = np.min_scalar_type(right)
1330+
if right == 0:
1331+
# Special case 0
1332+
right = left_dtype
1333+
elif (
1334+
not np.issubdtype(left_dtype, np.unsignedinteger)
1335+
and 0 < right <= 2 ** (8 * right_dtype.itemsize - 1) - 1
1336+
):
1337+
# If left dtype isn't unsigned, check if it fits in the signed dtype
1338+
right = np.dtype(f"i{right_dtype.itemsize}")
1339+
else:
1340+
right = right_dtype
1341+
13171342
new_dtype = np.result_type(left_dtype, right)
13181343

13191344
elif is_valid_na_for_dtype(right, left_dtype):
@@ -1619,11 +1644,13 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
16191644
with warnings.catch_warnings():
16201645
# We already disallow dtype=uint w/ negative numbers
16211646
# (test_constructor_coercion_signed_to_unsigned) so safe to ignore.
1622-
warnings.filterwarnings(
1623-
"ignore",
1624-
"NumPy will stop allowing conversion of out-of-bound Python int",
1625-
DeprecationWarning,
1626-
)
1647+
if not np_version_gt2:
1648+
warnings.filterwarnings(
1649+
"ignore",
1650+
"NumPy will stop allowing conversion of "
1651+
"out-of-bound Python int",
1652+
DeprecationWarning,
1653+
)
16271654
casted = np.array(arr, dtype=dtype, copy=False)
16281655
else:
16291656
with warnings.catch_warnings():
@@ -1660,6 +1687,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
16601687
raise ValueError(f"string values cannot be losslessly cast to {dtype}")
16611688

16621689
if dtype.kind == "u" and (arr < 0).any():
1690+
# TODO: can this be hit anymore after numpy 2.0?
16631691
raise OverflowError("Trying to coerce negative values to unsigned integers")
16641692

16651693
if arr.dtype.kind == "f":
@@ -1672,6 +1700,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
16721700
raise ValueError("Trying to coerce float values to integers")
16731701

16741702
if casted.dtype < arr.dtype:
1703+
# TODO: Can this path be hit anymore with numpy > 2
16751704
# GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows
16761705
raise ValueError(
16771706
f"Values are too large to be losslessly converted to {dtype}. "

pandas/core/ops/array_ops.py

+8
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,14 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
570570
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
571571
return Timedelta(obj)
572572

573+
# We want NumPy numeric scalars to behave like Python scalars
574+
# post NEP 50
575+
elif isinstance(obj, np.integer):
576+
return int(obj)
577+
578+
elif isinstance(obj, np.floating):
579+
return float(obj)
580+
573581
return obj
574582

575583

pandas/tests/dtypes/cast/test_promote.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -229,24 +229,24 @@ def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype):
229229
[
230230
# float filled with float
231231
("float32", 1, "float32"),
232-
("float32", np.finfo("float32").max * 1.1, "float64"),
232+
("float32", float(np.finfo("float32").max) * 1.1, "float64"),
233233
("float64", 1, "float64"),
234-
("float64", np.finfo("float32").max * 1.1, "float64"),
234+
("float64", float(np.finfo("float32").max) * 1.1, "float64"),
235235
# complex filled with float
236236
("complex64", 1, "complex64"),
237-
("complex64", np.finfo("float32").max * 1.1, "complex128"),
237+
("complex64", float(np.finfo("float32").max) * 1.1, "complex128"),
238238
("complex128", 1, "complex128"),
239-
("complex128", np.finfo("float32").max * 1.1, "complex128"),
239+
("complex128", float(np.finfo("float32").max) * 1.1, "complex128"),
240240
# float filled with complex
241241
("float32", 1 + 1j, "complex64"),
242-
("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"),
242+
("float32", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
243243
("float64", 1 + 1j, "complex128"),
244-
("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"),
244+
("float64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
245245
# complex filled with complex
246246
("complex64", 1 + 1j, "complex64"),
247-
("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"),
247+
("complex64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
248248
("complex128", 1 + 1j, "complex128"),
249-
("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"),
249+
("complex128", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
250250
],
251251
)
252252
def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype):

pandas/tests/dtypes/test_inference.py

+50
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@
3333
missing as libmissing,
3434
ops as libops,
3535
)
36+
from pandas.compat.numpy import np_version_gt2
3637

3738
from pandas.core.dtypes import inference
39+
from pandas.core.dtypes.cast import find_result_type
3840
from pandas.core.dtypes.common import (
3941
ensure_int32,
4042
is_bool,
@@ -1995,3 +1997,51 @@ def test_ensure_int32():
19951997
values = np.arange(10, dtype=np.int64)
19961998
result = ensure_int32(values)
19971999
assert result.dtype == np.int32
2000+
2001+
2002+
@pytest.mark.parametrize(
2003+
"right,result",
2004+
[
2005+
(0, np.uint8),
2006+
(-1, np.int16),
2007+
(300, np.uint16),
2008+
# For floats, we just upcast directly to float64 instead of trying to
2009+
# find a smaller floating dtype
2010+
(300.0, np.uint16), # for integer floats, we convert them to ints
2011+
(300.1, np.float64),
2012+
(np.int16(300), np.int16 if np_version_gt2 else np.uint16),
2013+
],
2014+
)
2015+
def test_find_result_type_uint_int(right, result):
2016+
left_dtype = np.dtype("uint8")
2017+
assert find_result_type(left_dtype, right) == result
2018+
2019+
2020+
@pytest.mark.parametrize(
2021+
"right,result",
2022+
[
2023+
(0, np.int8),
2024+
(-1, np.int8),
2025+
(300, np.int16),
2026+
# For floats, we just upcast directly to float64 instead of trying to
2027+
# find a smaller floating dtype
2028+
(300.0, np.int16), # for integer floats, we convert them to ints
2029+
(300.1, np.float64),
2030+
(np.int16(300), np.int16),
2031+
],
2032+
)
2033+
def test_find_result_type_int_int(right, result):
2034+
left_dtype = np.dtype("int8")
2035+
assert find_result_type(left_dtype, right) == result
2036+
2037+
2038+
@pytest.mark.parametrize(
2039+
"right,result",
2040+
[
2041+
(300.0, np.float64),
2042+
(np.float32(300), np.float32),
2043+
],
2044+
)
2045+
def test_find_result_type_floats(right, result):
2046+
left_dtype = np.dtype("float16")
2047+
assert find_result_type(left_dtype, right) == result

pandas/tests/indexes/numeric/test_numeric.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -354,11 +354,13 @@ def test_constructor(self, dtype):
354354
arr = index.values.copy()
355355
new_index = index_cls(arr, copy=True)
356356
tm.assert_index_equal(new_index, index, exact=True)
357-
val = arr[0] + 3000
357+
val = int(arr[0]) + 3000
358358

359359
# this should not change index
360-
arr[0] = val
361-
assert new_index[0] != val
360+
if dtype != np.int8:
361+
# NEP 50 won't allow assignment that would overflow
362+
arr[0] = val
363+
assert new_index[0] != val
362364

363365
if dtype == np.int64:
364366
# pass list, coerce fine
@@ -407,8 +409,12 @@ def test_constructor_coercion_signed_to_unsigned(
407409
any_unsigned_int_numpy_dtype,
408410
):
409411
# see gh-15832
410-
msg = "Trying to coerce negative values to unsigned integers"
411-
412+
msg = "|".join(
413+
[
414+
"Trying to coerce negative values to unsigned integers",
415+
"The elements provided in the data cannot all be casted",
416+
]
417+
)
412418
with pytest.raises(OverflowError, match=msg):
413419
Index([-1], dtype=any_unsigned_int_numpy_dtype)
414420

pandas/tests/indexing/test_coercion.py

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
IS64,
1616
is_platform_windows,
1717
)
18+
from pandas.compat.numpy import np_version_gt2
1819

1920
import pandas as pd
2021
import pandas._testing as tm
@@ -226,6 +227,8 @@ def test_insert_int_index(
226227
"insert, coerced_val, coerced_dtype",
227228
[
228229
(1, 1.0, None),
230+
# When float_numpy_dtype=float32, this is not the case
231+
# see the correction below
229232
(1.1, 1.1, np.float64),
230233
(False, False, object), # GH#36319
231234
("x", "x", object),
@@ -238,6 +241,10 @@ def test_insert_float_index(
238241
obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
239242
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
240243

244+
if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
245+
# Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
246+
# the expected dtype will be float32 if the original dtype was float32
247+
coerced_dtype = np.float32
241248
exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
242249
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
243250

pandas/tests/indexing/test_loc.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas._config import using_pyarrow_string_dtype
1616

1717
from pandas._libs import index as libindex
18+
from pandas.compat.numpy import np_version_gt2
1819
from pandas.errors import IndexingError
1920
import pandas.util._test_decorators as td
2021

@@ -3020,7 +3021,15 @@ def test_loc_setitem_uint8_upcast(value):
30203021
with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"):
30213022
df.loc[2, "col1"] = value # value that can't be held in uint8
30223023

3023-
expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16")
3024+
if np_version_gt2 and isinstance(value, np.int16):
3025+
# Note, result type of uint8 + int16 is int16
3026+
# in numpy < 2, though, numpy would inspect the
3027+
# value and see that it could fit in an uint16, resulting in a uint16
3028+
dtype = "int16"
3029+
else:
3030+
dtype = "uint16"
3031+
3032+
expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype)
30243033
tm.assert_frame_equal(df, expected)
30253034

30263035

pandas/tests/io/formats/test_to_html.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -419,15 +419,15 @@ def test_to_html_columns_arg(float_frame):
419419
"columns,justify,expected",
420420
[
421421
(
422-
MultiIndex.from_tuples(
423-
list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))),
422+
MultiIndex.from_arrays(
423+
[np.arange(2).repeat(2), np.mod(range(4), 2)],
424424
names=["CL0", "CL1"],
425425
),
426426
"left",
427427
"multiindex_1",
428428
),
429429
(
430-
MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))),
430+
MultiIndex.from_arrays([np.arange(4), np.mod(range(4), 2)]),
431431
"right",
432432
"multiindex_2",
433433
),

0 commit comments

Comments
 (0)