From 8fe487f728ba02232bf67817485c35888e631748 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Nov 2023 16:00:30 -0700 Subject: [PATCH 1/3] COMPAT: Numpy int64 default for Numpy 2.0 --- pandas/compat/numpy/__init__.py | 3 ++- pandas/tests/extension/test_masked.py | 5 +++-- pandas/tests/frame/test_reductions.py | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index a1ac1024fc3e0..3014bd652d8c4 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -12,6 +12,7 @@ np_version_gte1p24 = _nlv >= Version("1.24") np_version_gte1p24p3 = _nlv >= Version("1.24.3") np_version_gte1p25 = _nlv >= Version("1.25") +np_version_gt2 = _nlv >= Version("2.0.0.dev0") is_numpy_dev = _nlv.dev is not None _min_numpy_ver = "1.22.4" @@ -27,7 +28,7 @@ np_long: type np_ulong: type -if _nlv >= Version("2.0.0.dev0"): +if np_version_gt2: try: with warnings.catch_warnings(): warnings.filterwarnings( diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index bd12bcced1448..0c8c46135b9b5 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -22,6 +22,7 @@ IS64, is_platform_windows, ) +from pandas.compat.numpy import np_version_gt2 import pandas as pd import pandas._testing as tm @@ -42,7 +43,7 @@ ) from pandas.tests.extension import base -is_windows_or_32bit = is_platform_windows() or not IS64 +is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64 pytestmark = [ pytest.mark.filterwarnings( @@ -312,7 +313,7 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool): # overwrite to ensure pd.NA is tested instead of np.nan # https://github.com/pandas-dev/pandas/issues/30958 length = 64 - if not IS64 or is_platform_windows(): + if is_windows_or_32bit: # Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has # no attribute "itemsize" if not ser.dtype.itemsize == 8: # type: ignore[union-attr] diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 5d1f0ae724d61..fbba6a83875b0 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -10,6 +10,7 @@ IS64, is_platform_windows, ) +from pandas.compat.numpy import np_version_gt2 import pandas.util._test_decorators as td import pandas as pd @@ -32,7 +33,7 @@ nanops, ) -is_windows_or_is32 = is_platform_windows() or not IS64 +is_windows_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64 def assert_stat_op_calc( From 96a31d06be9f924bb140ce8d6695e184353320a8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 4 Nov 2023 15:46:58 -0700 Subject: [PATCH 2/3] Fix conditions --- pandas/tests/extension/base/reduce.py | 4 ++-- pandas/tests/extension/test_arrow.py | 2 +- pandas/tests/extension/test_masked.py | 24 ++++++++++++++++++++---- pandas/tests/frame/test_reductions.py | 15 +++++++++------ 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index 1b94a635d1748..4477ba17c1683 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -40,7 +40,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): expected = exp_op(skipna=skipna) tm.assert_almost_equal(result, expected) - def _get_expected_reduction_dtype(self, arr, op_name: str): + def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): # Find the expected dtype when the given reduction is done on a DataFrame # column with this array. The default assumes float64-like behavior, # i.e. retains the dtype. @@ -59,7 +59,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): kwargs = {"ddof": 1} if op_name in ["var", "std"] else {} - cmp_dtype = self._get_expected_reduction_dtype(arr, op_name) + cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna) # The DataFrame method just calls arr._reduce with keepdims=True, # so this first check is perfunctory. diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fe6bde65420f7..cb44453f55e5e 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -519,7 +519,7 @@ def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna, reque return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna) - def _get_expected_reduction_dtype(self, arr, op_name: str): + def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): if op_name in ["max", "min"]: cmp_dtype = arr.dtype elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index 0c8c46135b9b5..a5e8b2ed1efe5 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -280,7 +280,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): expected = pd.NA tm.assert_almost_equal(result, expected) - def _get_expected_reduction_dtype(self, arr, op_name: str): + def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): if tm.is_float_dtype(arr.dtype): cmp_dtype = arr.dtype.name elif op_name in ["mean", "median", "var", "std", "skew"]: @@ -290,16 +290,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str): elif arr.dtype in ["Int64", "UInt64"]: cmp_dtype = arr.dtype.name elif tm.is_signed_integer_dtype(arr.dtype): - cmp_dtype = "Int32" if is_windows_or_32bit else "Int64" + # TODO: Why does Window Numpy 2.0 dtype depend on skipna? + cmp_dtype = ( + "Int32" + if (is_platform_windows() and (not np_version_gt2 or not skipna)) + or not IS64 + else "Int64" + ) elif tm.is_unsigned_integer_dtype(arr.dtype): - cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64" + cmp_dtype = ( + "UInt32" + if (is_platform_windows() and (not np_version_gt2 or not skipna)) + or not IS64 + else "UInt64" + ) elif arr.dtype.kind == "b": if op_name in ["mean", "median", "var", "std", "skew"]: cmp_dtype = "Float64" elif op_name in ["min", "max"]: cmp_dtype = "boolean" elif op_name in ["sum", "prod"]: - cmp_dtype = "Int32" if is_windows_or_32bit else "Int64" + cmp_dtype = ( + "Int32" + if (is_platform_windows() and (not np_version_gt2 or not skipna)) + or not IS64 + else "Int64" + ) else: raise TypeError("not supposed to reach this") else: diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index fbba6a83875b0..25d6dcaf53bfb 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -33,7 +33,8 @@ nanops, ) -is_windows_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64 +is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64 +is_windows_or_is32 = is_platform_windows() or not IS64 def assert_stat_op_calc( @@ -1768,13 +1769,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype): @pytest.mark.parametrize( "opname, dtype, exp_value, exp_dtype", [ - ("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")), - ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")), - ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")), + ("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")), + ("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")), + ("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")), ("sum", "Int64", 0, "Int64"), ("prod", "Int64", 1, "Int64"), - ("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")), - ("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")), + ("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")), + ("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")), ("sum", "UInt64", 0, "UInt64"), ("prod", "UInt64", 1, "UInt64"), ("sum", "Float32", 0, "Float32"), @@ -1789,6 +1790,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype expected = Series([exp_value, exp_value], dtype=exp_dtype) tm.assert_series_equal(result, expected) + # TODO: why does min_count=1 impact the resulting Windows dtype + # differently than min_count=0? @pytest.mark.parametrize( "opname, dtype, exp_dtype", [ From 3d2269cab76099d2e5d78a59a013d1d880caf0f3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 6 Nov 2023 09:49:22 -0800 Subject: [PATCH 3/3] Address test_reindex_tzaware_fill_value --- pandas/tests/frame/methods/test_reindex.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index fb6e08cd52d97..07bd2f1b7d99b 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -12,6 +12,7 @@ IS64, is_platform_windows, ) +from pandas.compat.numpy import np_version_gt2 import pandas.util._test_decorators as td import pandas as pd @@ -131,7 +132,7 @@ class TestDataFrameSelectReindex: # test_indexing @pytest.mark.xfail( - not IS64 or is_platform_windows(), + not IS64 or (is_platform_windows() and not np_version_gt2), reason="Passes int32 values to DatetimeArray in make_na_array on " "windows, 32bit linux builds", )