Adjust test_binops for pandas 2.2 (#15078)

mroeschke · web-flow · commit 193ab6e877ca · 2024-02-20T13:53:46.000Z
2 tests needed to be adjusted due to pandas changes in behaviors in pandas-dev/pandas#57447 and pandas-dev/pandas#57448 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #15078
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
@@ -567,9 +567,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
         if other is NotImplemented:
             return NotImplemented
         if isinstance(other, cudf.DateOffset):
-            return other._datetime_binop(self, op, reflect=reflect).astype(
-                self.dtype
-            )
+            return other._datetime_binop(self, op, reflect=reflect)
 
         # We check this on `other` before reflection since we already know the
         # dtype of `self`.
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
@@ -13,6 +13,7 @@
 
 import cudf
 from cudf import Series
+from cudf.core._compat import PANDAS_GE_220
 from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.core.index import as_index
 from cudf.testing import _utils as utils
@@ -824,11 +825,21 @@ def test_operator_func_between_series_logical(
 @pytest.mark.parametrize("fill_value", [None, 1.0])
 @pytest.mark.parametrize("use_cudf_scalar", [False, True])
 def test_operator_func_series_and_scalar_logical(
-    dtype, func, has_nulls, scalar, fill_value, use_cudf_scalar
+    request, dtype, func, has_nulls, scalar, fill_value, use_cudf_scalar
 ):
-    gdf_series = utils.gen_rand_series(
-        dtype, 1000, has_nulls=has_nulls, stride=10000
+    request.applymarker(
+        pytest.mark.xfail(
+            PANDAS_GE_220
+            and fill_value == 1.0
+            and scalar is np.nan
+            and (has_nulls or (not has_nulls and func not in {"eq", "ne"})),
+            reason="https://github.com/pandas-dev/pandas/issues/57447",
+        )
     )
+    if has_nulls:
+        gdf_series = cudf.Series([-1.0, 0, cudf.NA, 1.1], dtype=dtype)
+    else:
+        gdf_series = cudf.Series([-1.0, 0, 10.5, 1.1], dtype=dtype)
     pdf_series = gdf_series.to_pandas(nullable=True)
     gdf_series_result = getattr(gdf_series, func)(
         cudf.Scalar(scalar) if use_cudf_scalar else scalar,
@@ -1684,16 +1695,6 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
     assert result.dtype == valid_result.dtype
 
 
-@pytest.mark.parametrize(
-    "date_col",
-    [
-        [
-            "2000-01-01 00:00:00.012345678",
-            "2000-01-31 00:00:00.012345678",
-            "2000-02-29 00:00:00.012345678",
-        ]
-    ],
-)
 @pytest.mark.parametrize("n_periods", [0, 1, -1, 12, -12])
 @pytest.mark.parametrize(
     "frequency",
@@ -1714,8 +1715,40 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
 )
 @pytest.mark.parametrize("op", [operator.add, operator.sub])
 def test_datetime_dateoffset_binaryop(
-    date_col, n_periods, frequency, dtype, op
+    request, n_periods, frequency, dtype, op
 ):
+    request.applymarker(
+        pytest.mark.xfail(
+            PANDAS_GE_220
+            and dtype in {"datetime64[ms]", "datetime64[s]"}
+            and frequency == "microseconds"
+            and n_periods == 0,
+            reason="https://github.com/pandas-dev/pandas/issues/57448",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            not PANDAS_GE_220
+            and dtype in {"datetime64[ms]", "datetime64[s]"}
+            and frequency in ("microseconds", "nanoseconds")
+            and n_periods != 0,
+            reason="https://github.com/pandas-dev/pandas/pull/55595",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            not PANDAS_GE_220
+            and dtype == "datetime64[us]"
+            and frequency == "nanoseconds"
+            and n_periods != 0,
+            reason="https://github.com/pandas-dev/pandas/pull/55595",
+        )
+    )
+    date_col = [
+        "2000-01-01 00:00:00.012345678",
+        "2000-01-31 00:00:00.012345678",
+        "2000-02-29 00:00:00.012345678",
+    ]
     gsr = cudf.Series(date_col, dtype=dtype)
     psr = gsr.to_pandas()
 
@@ -1776,16 +1809,6 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
     utils.assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "date_col",
-    [
-        [
-            "2000-01-01 00:00:00.012345678",
-            "2000-01-31 00:00:00.012345678",
-            "2000-02-29 00:00:00.012345678",
-        ]
-    ],
-)
 @pytest.mark.parametrize("n_periods", [0, 1, -1, 12, -12])
 @pytest.mark.parametrize(
     "frequency",
@@ -1805,8 +1828,31 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
     ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"],
 )
 def test_datetime_dateoffset_binaryop_reflected(
-    date_col, n_periods, frequency, dtype
+    request, n_periods, frequency, dtype
 ):
+    request.applymarker(
+        pytest.mark.xfail(
+            not PANDAS_GE_220
+            and dtype in {"datetime64[ms]", "datetime64[s]"}
+            and frequency in ("microseconds", "nanoseconds")
+            and n_periods != 0,
+            reason="https://github.com/pandas-dev/pandas/pull/55595",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            not PANDAS_GE_220
+            and dtype == "datetime64[us]"
+            and frequency == "nanoseconds"
+            and n_periods != 0,
+            reason="https://github.com/pandas-dev/pandas/pull/55595",
+        )
+    )
+    date_col = [
+        "2000-01-01 00:00:00.012345678",
+        "2000-01-31 00:00:00.012345678",
+        "2000-02-29 00:00:00.012345678",
+    ]
     gsr = cudf.Series(date_col, dtype=dtype)
     psr = gsr.to_pandas()  # converts to nanos