Skip to content

Commit ef52fea

Browse files
authored
COMPAT: Numpy int64 Windows default for Numpy 2.0 (#55817)
* COMPAT: Numpy int64 default for Numpy 2.0 * Fix conditions * Address test_reindex_tzaware_fill_value
1 parent 2c12853 commit ef52fea

File tree

6 files changed

+39
-16
lines changed

6 files changed

+39
-16
lines changed

pandas/compat/numpy/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
np_version_gte1p24 = _nlv >= Version("1.24")
1313
np_version_gte1p24p3 = _nlv >= Version("1.24.3")
1414
np_version_gte1p25 = _nlv >= Version("1.25")
15+
np_version_gt2 = _nlv >= Version("2.0.0.dev0")
1516
is_numpy_dev = _nlv.dev is not None
1617
_min_numpy_ver = "1.22.4"
1718

@@ -27,7 +28,7 @@
2728
np_long: type
2829
np_ulong: type
2930

30-
if _nlv >= Version("2.0.0.dev0"):
31+
if np_version_gt2:
3132
try:
3233
with warnings.catch_warnings():
3334
warnings.filterwarnings(

pandas/tests/extension/base/reduce.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
4040
expected = exp_op(skipna=skipna)
4141
tm.assert_almost_equal(result, expected)
4242

43-
def _get_expected_reduction_dtype(self, arr, op_name: str):
43+
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
4444
# Find the expected dtype when the given reduction is done on a DataFrame
4545
# column with this array. The default assumes float64-like behavior,
4646
# i.e. retains the dtype.
@@ -59,7 +59,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
5959

6060
kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
6161

62-
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)
62+
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)
6363

6464
# The DataFrame method just calls arr._reduce with keepdims=True,
6565
# so this first check is perfunctory.

pandas/tests/extension/test_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna, reque
519519

520520
return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)
521521

522-
def _get_expected_reduction_dtype(self, arr, op_name: str):
522+
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
523523
if op_name in ["max", "min"]:
524524
cmp_dtype = arr.dtype
525525
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":

pandas/tests/extension/test_masked.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
IS64,
2323
is_platform_windows,
2424
)
25+
from pandas.compat.numpy import np_version_gt2
2526

2627
import pandas as pd
2728
import pandas._testing as tm
@@ -42,7 +43,7 @@
4243
)
4344
from pandas.tests.extension import base
4445

45-
is_windows_or_32bit = is_platform_windows() or not IS64
46+
is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64
4647

4748
pytestmark = [
4849
pytest.mark.filterwarnings(
@@ -279,7 +280,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
279280
expected = pd.NA
280281
tm.assert_almost_equal(result, expected)
281282

282-
def _get_expected_reduction_dtype(self, arr, op_name: str):
283+
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
283284
if tm.is_float_dtype(arr.dtype):
284285
cmp_dtype = arr.dtype.name
285286
elif op_name in ["mean", "median", "var", "std", "skew"]:
@@ -289,16 +290,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
289290
elif arr.dtype in ["Int64", "UInt64"]:
290291
cmp_dtype = arr.dtype.name
291292
elif tm.is_signed_integer_dtype(arr.dtype):
292-
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
293+
# TODO: Why does Window Numpy 2.0 dtype depend on skipna?
294+
cmp_dtype = (
295+
"Int32"
296+
if (is_platform_windows() and (not np_version_gt2 or not skipna))
297+
or not IS64
298+
else "Int64"
299+
)
293300
elif tm.is_unsigned_integer_dtype(arr.dtype):
294-
cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
301+
cmp_dtype = (
302+
"UInt32"
303+
if (is_platform_windows() and (not np_version_gt2 or not skipna))
304+
or not IS64
305+
else "UInt64"
306+
)
295307
elif arr.dtype.kind == "b":
296308
if op_name in ["mean", "median", "var", "std", "skew"]:
297309
cmp_dtype = "Float64"
298310
elif op_name in ["min", "max"]:
299311
cmp_dtype = "boolean"
300312
elif op_name in ["sum", "prod"]:
301-
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
313+
cmp_dtype = (
314+
"Int32"
315+
if (is_platform_windows() and (not np_version_gt2 or not skipna))
316+
or not IS64
317+
else "Int64"
318+
)
302319
else:
303320
raise TypeError("not supposed to reach this")
304321
else:
@@ -312,7 +329,7 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
312329
# overwrite to ensure pd.NA is tested instead of np.nan
313330
# https://github.com/pandas-dev/pandas/issues/30958
314331
length = 64
315-
if not IS64 or is_platform_windows():
332+
if is_windows_or_32bit:
316333
# Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has
317334
# no attribute "itemsize"
318335
if not ser.dtype.itemsize == 8: # type: ignore[union-attr]

pandas/tests/frame/methods/test_reindex.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
IS64,
1313
is_platform_windows,
1414
)
15+
from pandas.compat.numpy import np_version_gt2
1516
import pandas.util._test_decorators as td
1617

1718
import pandas as pd
@@ -131,7 +132,7 @@ class TestDataFrameSelectReindex:
131132
# test_indexing
132133

133134
@pytest.mark.xfail(
134-
not IS64 or is_platform_windows(),
135+
not IS64 or (is_platform_windows() and not np_version_gt2),
135136
reason="Passes int32 values to DatetimeArray in make_na_array on "
136137
"windows, 32bit linux builds",
137138
)

pandas/tests/frame/test_reductions.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
IS64,
1111
is_platform_windows,
1212
)
13+
from pandas.compat.numpy import np_version_gt2
1314
import pandas.util._test_decorators as td
1415

1516
import pandas as pd
@@ -32,6 +33,7 @@
3233
nanops,
3334
)
3435

36+
is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
3537
is_windows_or_is32 = is_platform_windows() or not IS64
3638

3739

@@ -1767,13 +1769,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
17671769
@pytest.mark.parametrize(
17681770
"opname, dtype, exp_value, exp_dtype",
17691771
[
1770-
("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")),
1771-
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
1772-
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
1772+
("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")),
1773+
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
1774+
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
17731775
("sum", "Int64", 0, "Int64"),
17741776
("prod", "Int64", 1, "Int64"),
1775-
("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")),
1776-
("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")),
1777+
("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
1778+
("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
17771779
("sum", "UInt64", 0, "UInt64"),
17781780
("prod", "UInt64", 1, "UInt64"),
17791781
("sum", "Float32", 0, "Float32"),
@@ -1788,6 +1790,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype
17881790
expected = Series([exp_value, exp_value], dtype=exp_dtype)
17891791
tm.assert_series_equal(result, expected)
17901792

1793+
# TODO: why does min_count=1 impact the resulting Windows dtype
1794+
# differently than min_count=0?
17911795
@pytest.mark.parametrize(
17921796
"opname, dtype, exp_dtype",
17931797
[

0 commit comments

Comments
 (0)