Skip to content

Commit 8a616b9

Browse files
Backport PR #55817 on branch 2.1.x (COMPAT: Numpy int64 Windows default for Numpy 2.0) (#55851)
Backport PR #55817: COMPAT: Numpy int64 Windows default for Numpy 2.0 Co-authored-by: Matthew Roeschke <[email protected]>
1 parent bf91684 commit 8a616b9

File tree

6 files changed

+39
-16
lines changed

6 files changed

+39
-16
lines changed

pandas/compat/numpy/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
np_version_gte1p24 = _nlv >= Version("1.24")
1212
np_version_gte1p24p3 = _nlv >= Version("1.24.3")
1313
np_version_gte1p25 = _nlv >= Version("1.25")
14+
np_version_gt2 = _nlv >= Version("2.0.0.dev0")
1415
is_numpy_dev = _nlv.dev is not None
1516
_min_numpy_ver = "1.22.4"
1617

@@ -26,7 +27,7 @@
2627
np_long: type
2728
np_ulong: type
2829

29-
if _nlv >= Version("2.0.0.dev0"):
30+
if np_version_gt2:
3031
try:
3132
with warnings.catch_warnings():
3233
warnings.filterwarnings(

pandas/tests/extension/base/reduce.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def check_reduce(self, s, op_name, skipna):
3939
expected = exp_op(skipna=skipna)
4040
tm.assert_almost_equal(result, expected)
4141

42-
def _get_expected_reduction_dtype(self, arr, op_name: str):
42+
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
4343
# Find the expected dtype when the given reduction is done on a DataFrame
4444
# column with this array. The default assumes float64-like behavior,
4545
# i.e. retains the dtype.
@@ -58,7 +58,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
5858

5959
kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
6060

61-
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)
61+
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)
6262

6363
# The DataFrame method just calls arr._reduce with keepdims=True,
6464
# so this first check is perfunctory.

pandas/tests/extension/test_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ def test_reduce_series_boolean(
543543

544544
return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)
545545

546-
def _get_expected_reduction_dtype(self, arr, op_name: str):
546+
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
547547
if op_name in ["max", "min"]:
548548
cmp_dtype = arr.dtype
549549
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":

pandas/tests/extension/test_masked.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
IS64,
2121
is_platform_windows,
2222
)
23+
from pandas.compat.numpy import np_version_gt2
2324

2425
import pandas as pd
2526
import pandas._testing as tm
@@ -40,7 +41,7 @@
4041
)
4142
from pandas.tests.extension import base
4243

43-
is_windows_or_32bit = is_platform_windows() or not IS64
44+
is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64
4445

4546
pytestmark = [
4647
pytest.mark.filterwarnings(
@@ -325,7 +326,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
325326
expected = pd.NA
326327
tm.assert_almost_equal(result, expected)
327328

328-
def _get_expected_reduction_dtype(self, arr, op_name: str):
329+
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
329330
if tm.is_float_dtype(arr.dtype):
330331
cmp_dtype = arr.dtype.name
331332
elif op_name in ["mean", "median", "var", "std", "skew"]:
@@ -335,16 +336,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
335336
elif arr.dtype in ["Int64", "UInt64"]:
336337
cmp_dtype = arr.dtype.name
337338
elif tm.is_signed_integer_dtype(arr.dtype):
338-
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
339+
# TODO: Why does Window Numpy 2.0 dtype depend on skipna?
340+
cmp_dtype = (
341+
"Int32"
342+
if (is_platform_windows() and (not np_version_gt2 or not skipna))
343+
or not IS64
344+
else "Int64"
345+
)
339346
elif tm.is_unsigned_integer_dtype(arr.dtype):
340-
cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
347+
cmp_dtype = (
348+
"UInt32"
349+
if (is_platform_windows() and (not np_version_gt2 or not skipna))
350+
or not IS64
351+
else "UInt64"
352+
)
341353
elif arr.dtype.kind == "b":
342354
if op_name in ["mean", "median", "var", "std", "skew"]:
343355
cmp_dtype = "Float64"
344356
elif op_name in ["min", "max"]:
345357
cmp_dtype = "boolean"
346358
elif op_name in ["sum", "prod"]:
347-
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
359+
cmp_dtype = (
360+
"Int32"
361+
if (is_platform_windows() and (not np_version_gt2 or not skipna))
362+
or not IS64
363+
else "Int64"
364+
)
348365
else:
349366
raise TypeError("not supposed to reach this")
350367
else:
@@ -360,7 +377,7 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
360377
# overwrite to ensure pd.NA is tested instead of np.nan
361378
# https://github.com/pandas-dev/pandas/issues/30958
362379
length = 64
363-
if not IS64 or is_platform_windows():
380+
if is_windows_or_32bit:
364381
# Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has
365382
# no attribute "itemsize"
366383
if not ser.dtype.itemsize == 8: # type: ignore[union-attr]

pandas/tests/frame/methods/test_reindex.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
IS64,
1313
is_platform_windows,
1414
)
15+
from pandas.compat.numpy import np_version_gt2
1516
import pandas.util._test_decorators as td
1617

1718
import pandas as pd
@@ -131,7 +132,7 @@ class TestDataFrameSelectReindex:
131132
# test_indexing
132133

133134
@pytest.mark.xfail(
134-
not IS64 or is_platform_windows(),
135+
not IS64 or (is_platform_windows() and not np_version_gt2),
135136
reason="Passes int32 values to DatetimeArray in make_na_array on "
136137
"windows, 32bit linux builds",
137138
)

pandas/tests/frame/test_reductions.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
IS64,
1111
is_platform_windows,
1212
)
13+
from pandas.compat.numpy import np_version_gt2
1314
import pandas.util._test_decorators as td
1415

1516
import pandas as pd
@@ -32,6 +33,7 @@
3233
nanops,
3334
)
3435

36+
is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
3537
is_windows_or_is32 = is_platform_windows() or not IS64
3638

3739

@@ -1766,13 +1768,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
17661768
@pytest.mark.parametrize(
17671769
"opname, dtype, exp_value, exp_dtype",
17681770
[
1769-
("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")),
1770-
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
1771-
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
1771+
("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")),
1772+
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
1773+
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
17721774
("sum", "Int64", 0, "Int64"),
17731775
("prod", "Int64", 1, "Int64"),
1774-
("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")),
1775-
("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")),
1776+
("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
1777+
("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
17761778
("sum", "UInt64", 0, "UInt64"),
17771779
("prod", "UInt64", 1, "UInt64"),
17781780
("sum", "Float32", 0, "Float32"),
@@ -1787,6 +1789,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype
17871789
expected = Series([exp_value, exp_value], dtype=exp_dtype)
17881790
tm.assert_series_equal(result, expected)
17891791

1792+
# TODO: why does min_count=1 impact the resulting Windows dtype
1793+
# differently than min_count=0?
17901794
@pytest.mark.parametrize(
17911795
"opname, dtype, exp_dtype",
17921796
[

0 commit comments

Comments
 (0)