Backport PR #55817 on branch 2.1.x (COMPAT: Numpy int64 Windows default for Numpy 2.0) (#55851)

meeseeksmachine · mroeschke · web-flow · commit 8a616b901f1f · 2023-11-06T14:05:51.000-08:00
Backport PR #55817: COMPAT: Numpy int64 Windows default for Numpy 2.0 Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
@@ -11,6 +11,7 @@
 np_version_gte1p24 = _nlv >= Version("1.24")
 np_version_gte1p24p3 = _nlv >= Version("1.24.3")
 np_version_gte1p25 = _nlv >= Version("1.25")
+np_version_gt2 = _nlv >= Version("2.0.0.dev0")
 is_numpy_dev = _nlv.dev is not None
 _min_numpy_ver = "1.22.4"
 
@@ -26,7 +27,7 @@
 np_long: type
 np_ulong: type
 
-if _nlv >= Version("2.0.0.dev0"):
+if np_version_gt2:
     try:
         with warnings.catch_warnings():
             warnings.filterwarnings(
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
@@ -39,7 +39,7 @@ def check_reduce(self, s, op_name, skipna):
             expected = exp_op(skipna=skipna)
         tm.assert_almost_equal(result, expected)
 
-    def _get_expected_reduction_dtype(self, arr, op_name: str):
+    def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
         # Find the expected dtype when the given reduction is done on a DataFrame
         # column with this array.  The default assumes float64-like behavior,
         # i.e. retains the dtype.
@@ -58,7 +58,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
 
         kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
 
-        cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)
+        cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)
 
         # The DataFrame method just calls arr._reduce with keepdims=True,
         #  so this first check is perfunctory.
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -543,7 +543,7 @@ def test_reduce_series_boolean(
 
         return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)
 
-    def _get_expected_reduction_dtype(self, arr, op_name: str):
+    def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
         if op_name in ["max", "min"]:
             cmp_dtype = arr.dtype
         elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py
@@ -20,6 +20,7 @@
     IS64,
     is_platform_windows,
 )
+from pandas.compat.numpy import np_version_gt2
 
 import pandas as pd
 import pandas._testing as tm
@@ -40,7 +41,7 @@
 )
 from pandas.tests.extension import base
 
-is_windows_or_32bit = is_platform_windows() or not IS64
+is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64
 
 pytestmark = [
     pytest.mark.filterwarnings(
@@ -325,7 +326,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
                 expected = pd.NA
         tm.assert_almost_equal(result, expected)
 
-    def _get_expected_reduction_dtype(self, arr, op_name: str):
+    def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
         if tm.is_float_dtype(arr.dtype):
             cmp_dtype = arr.dtype.name
         elif op_name in ["mean", "median", "var", "std", "skew"]:
@@ -335,16 +336,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
         elif arr.dtype in ["Int64", "UInt64"]:
             cmp_dtype = arr.dtype.name
         elif tm.is_signed_integer_dtype(arr.dtype):
-            cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
+            # TODO: Why does Window Numpy 2.0 dtype depend on skipna?
+            cmp_dtype = (
+                "Int32"
+                if (is_platform_windows() and (not np_version_gt2 or not skipna))
+                or not IS64
+                else "Int64"
+            )
         elif tm.is_unsigned_integer_dtype(arr.dtype):
-            cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
+            cmp_dtype = (
+                "UInt32"
+                if (is_platform_windows() and (not np_version_gt2 or not skipna))
+                or not IS64
+                else "UInt64"
+            )
         elif arr.dtype.kind == "b":
             if op_name in ["mean", "median", "var", "std", "skew"]:
                 cmp_dtype = "Float64"
             elif op_name in ["min", "max"]:
                 cmp_dtype = "boolean"
             elif op_name in ["sum", "prod"]:
-                cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
+                cmp_dtype = (
+                    "Int32"
+                    if (is_platform_windows() and (not np_version_gt2 or not skipna))
+                    or not IS64
+                    else "Int64"
+                )
             else:
                 raise TypeError("not supposed to reach this")
         else:
@@ -360,7 +377,7 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
         # overwrite to ensure pd.NA is tested instead of np.nan
         # https://github.com/pandas-dev/pandas/issues/30958
         length = 64
-        if not IS64 or is_platform_windows():
+        if is_windows_or_32bit:
             # Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has
             # no attribute "itemsize"
             if not ser.dtype.itemsize == 8:  # type: ignore[union-attr]
diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
@@ -12,6 +12,7 @@
     IS64,
     is_platform_windows,
 )
+from pandas.compat.numpy import np_version_gt2
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -131,7 +132,7 @@ class TestDataFrameSelectReindex:
     # test_indexing
 
     @pytest.mark.xfail(
-        not IS64 or is_platform_windows(),
+        not IS64 or (is_platform_windows() and not np_version_gt2),
         reason="Passes int32 values to DatetimeArray in make_na_array on "
         "windows, 32bit linux builds",
     )
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -10,6 +10,7 @@
     IS64,
     is_platform_windows,
 )
+from pandas.compat.numpy import np_version_gt2
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -32,6 +33,7 @@
     nanops,
 )
 
+is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
 is_windows_or_is32 = is_platform_windows() or not IS64
 
 
@@ -1766,13 +1768,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
     @pytest.mark.parametrize(
         "opname, dtype, exp_value, exp_dtype",
         [
-            ("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")),
-            ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
-            ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
+            ("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")),
+            ("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
+            ("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
             ("sum", "Int64", 0, "Int64"),
             ("prod", "Int64", 1, "Int64"),
-            ("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")),
-            ("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")),
+            ("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
+            ("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
             ("sum", "UInt64", 0, "UInt64"),
             ("prod", "UInt64", 1, "UInt64"),
             ("sum", "Float32", 0, "Float32"),
@@ -1787,6 +1789,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype
         expected = Series([exp_value, exp_value], dtype=exp_dtype)
         tm.assert_series_equal(result, expected)
 
+    # TODO: why does min_count=1 impact the resulting Windows dtype
+    # differently than min_count=0?
     @pytest.mark.parametrize(
         "opname, dtype, exp_dtype",
         [

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@`
`12`	`12`	`IS64,`
`13`	`13`	`is_platform_windows,`
`14`	`14`	`)`
	`15`	`+from pandas.compat.numpy import np_version_gt2`
`15`	`16`	`import pandas.util._test_decorators as td`
`16`	`17`
`17`	`18`	`import pandas as pd`
`@@ -131,7 +132,7 @@ class TestDataFrameSelectReindex:`
`131`	`132`	`# test_indexing`
`132`	`133`
`133`	`134`	`@pytest.mark.xfail(`
`134`		`- not IS64 or is_platform_windows(),`
	`135`	`+ not IS64 or (is_platform_windows() and not np_version_gt2),`
`135`	`136`	`reason="Passes int32 values to DatetimeArray in make_na_array on "`
`136`	`137`	`"windows, 32bit linux builds",`
`137`	`138`	`)`