Skip to content

Backport PR #55817 on branch 2.1.x (COMPAT: Numpy int64 Windows default for Numpy 2.0) #55851

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/compat/numpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
np_version_gte1p24 = _nlv >= Version("1.24")
np_version_gte1p24p3 = _nlv >= Version("1.24.3")
np_version_gte1p25 = _nlv >= Version("1.25")
np_version_gt2 = _nlv >= Version("2.0.0.dev0")
is_numpy_dev = _nlv.dev is not None
_min_numpy_ver = "1.22.4"

Expand All @@ -26,7 +27,7 @@
np_long: type
np_ulong: type

if _nlv >= Version("2.0.0.dev0"):
if np_version_gt2:
try:
with warnings.catch_warnings():
warnings.filterwarnings(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def check_reduce(self, s, op_name, skipna):
expected = exp_op(skipna=skipna)
tm.assert_almost_equal(result, expected)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
# Find the expected dtype when the given reduction is done on a DataFrame
# column with this array. The default assumes float64-like behavior,
# i.e. retains the dtype.
Expand All @@ -58,7 +58,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):

kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}

cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)

# The DataFrame method just calls arr._reduce with keepdims=True,
# so this first check is perfunctory.
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ def test_reduce_series_boolean(

return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
Expand Down
29 changes: 23 additions & 6 deletions pandas/tests/extension/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2

import pandas as pd
import pandas._testing as tm
Expand All @@ -40,7 +41,7 @@
)
from pandas.tests.extension import base

is_windows_or_32bit = is_platform_windows() or not IS64
is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64

pytestmark = [
pytest.mark.filterwarnings(
Expand Down Expand Up @@ -325,7 +326,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
expected = pd.NA
tm.assert_almost_equal(result, expected)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
if tm.is_float_dtype(arr.dtype):
cmp_dtype = arr.dtype.name
elif op_name in ["mean", "median", "var", "std", "skew"]:
Expand All @@ -335,16 +336,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
elif arr.dtype in ["Int64", "UInt64"]:
cmp_dtype = arr.dtype.name
elif tm.is_signed_integer_dtype(arr.dtype):
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
# TODO: Why does Window Numpy 2.0 dtype depend on skipna?
cmp_dtype = (
"Int32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "Int64"
)
elif tm.is_unsigned_integer_dtype(arr.dtype):
cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
cmp_dtype = (
"UInt32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "UInt64"
)
elif arr.dtype.kind == "b":
if op_name in ["mean", "median", "var", "std", "skew"]:
cmp_dtype = "Float64"
elif op_name in ["min", "max"]:
cmp_dtype = "boolean"
elif op_name in ["sum", "prod"]:
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
cmp_dtype = (
"Int32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "Int64"
)
else:
raise TypeError("not supposed to reach this")
else:
Expand All @@ -360,7 +377,7 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
# overwrite to ensure pd.NA is tested instead of np.nan
# https://github.com/pandas-dev/pandas/issues/30958
length = 64
if not IS64 or is_platform_windows():
if is_windows_or_32bit:
# Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has
# no attribute "itemsize"
if not ser.dtype.itemsize == 8: # type: ignore[union-attr]
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -131,7 +132,7 @@ class TestDataFrameSelectReindex:
# test_indexing

@pytest.mark.xfail(
not IS64 or is_platform_windows(),
not IS64 or (is_platform_windows() and not np_version_gt2),
reason="Passes int32 values to DatetimeArray in make_na_array on "
"windows, 32bit linux builds",
)
Expand Down
14 changes: 9 additions & 5 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2
import pandas.util._test_decorators as td

import pandas as pd
Expand All @@ -32,6 +33,7 @@
nanops,
)

is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
is_windows_or_is32 = is_platform_windows() or not IS64


Expand Down Expand Up @@ -1766,13 +1768,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
@pytest.mark.parametrize(
"opname, dtype, exp_value, exp_dtype",
[
("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("sum", "Int64", 0, "Int64"),
("prod", "Int64", 1, "Int64"),
("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")),
("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")),
("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
("sum", "UInt64", 0, "UInt64"),
("prod", "UInt64", 1, "UInt64"),
("sum", "Float32", 0, "Float32"),
Expand All @@ -1787,6 +1789,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype
expected = Series([exp_value, exp_value], dtype=exp_dtype)
tm.assert_series_equal(result, expected)

# TODO: why does min_count=1 impact the resulting Windows dtype
# differently than min_count=0?
@pytest.mark.parametrize(
"opname, dtype, exp_dtype",
[
Expand Down