ENH: pd.NA comparison with time, date, timedelta (#50901)

jbrockmendel · web-flow · commit 7f2aa8f46a4a · 2023-01-25T09:20:33.000-08:00
* ENH: pd.NA comparison with time, date, timedelta

* mypy fixup

* fix on nullable dtypes
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -3,6 +3,11 @@ import numbers
 from sys import maxsize
 
 cimport cython
+from cpython.datetime cimport (
+    date,
+    time,
+    timedelta,
+)
 from cython cimport Py_ssize_t
 
 import numpy as np
@@ -307,6 +312,7 @@ def is_numeric_na(values: ndarray) -> ndarray:
 
 
 def _create_binary_propagating_op(name, is_divmod=False):
+    is_cmp = name.strip("_") in ["eq", "ne", "le", "lt", "ge", "gt"]
 
     def method(self, other):
         if (other is C_NA or isinstance(other, (str, bytes))
@@ -329,6 +335,9 @@ def _create_binary_propagating_op(name, is_divmod=False):
             else:
                 return out
 
+        elif is_cmp and isinstance(other, (date, time, timedelta)):
+            return NA
+
         return NotImplemented
 
     method.__name__ = name
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pytest
 
+from pandas._typing import Dtype
+
 from pandas.core.dtypes.common import is_bool_dtype
 from pandas.core.dtypes.missing import na_value_for_dtype
 
@@ -260,6 +262,9 @@ def test_fillna_length_mismatch(self, data_missing):
         with pytest.raises(ValueError, match=msg):
             data_missing.fillna(data_missing.take([1]))
 
+    # Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool]
+    _combine_le_expected_dtype: Dtype = np.dtype(bool)
+
     def test_combine_le(self, data_repeated):
         # GH 20825
         # Test that combine works when doing a <= (le) comparison
@@ -268,13 +273,17 @@ def test_combine_le(self, data_repeated):
         s2 = pd.Series(orig_data2)
         result = s1.combine(s2, lambda x1, x2: x1 <= x2)
         expected = pd.Series(
-            [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))]
+            [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
+            dtype=self._combine_le_expected_dtype,
         )
         self.assert_series_equal(result, expected)
 
         val = s1.iloc[0]
         result = s1.combine(val, lambda x1, x2: x1 <= x2)
-        expected = pd.Series([a <= val for a in list(orig_data1)])
+        expected = pd.Series(
+            [a <= val for a in list(orig_data1)],
+            dtype=self._combine_le_expected_dtype,
+        )
         self.assert_series_equal(result, expected)
 
     def test_combine_add(self, data_repeated):
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -972,11 +972,7 @@ def test_factorize(self, data_for_grouping, request):
             )
         super().test_factorize(data_for_grouping)
 
-    @pytest.mark.xfail(
-        reason="result dtype pyarrow[bool] better than expected dtype object"
-    )
-    def test_combine_le(self, data_repeated):
-        super().test_combine_le(data_repeated)
+    _combine_le_expected_dtype = "bool[pyarrow]"
 
     def test_combine_add(self, data_repeated, request):
         pa_dtype = next(data_repeated(1)).dtype.pyarrow_dtype
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -176,6 +176,8 @@ class TestReshaping(base.BaseReshapingTests):
 
 
 class TestMethods(base.BaseMethodsTests):
+    _combine_le_expected_dtype = "boolean"
+
     def test_factorize(self, data_for_grouping):
         # override because we only have 2 unique values
         labels, uniques = pd.factorize(data_for_grouping, use_na_sentinel=True)
@@ -185,23 +187,6 @@ def test_factorize(self, data_for_grouping):
         tm.assert_numpy_array_equal(labels, expected_labels)
         self.assert_extension_array_equal(uniques, expected_uniques)
 
-    def test_combine_le(self, data_repeated):
-        # override because expected needs to be boolean instead of bool dtype
-        orig_data1, orig_data2 = data_repeated(2)
-        s1 = pd.Series(orig_data1)
-        s2 = pd.Series(orig_data2)
-        result = s1.combine(s2, lambda x1, x2: x1 <= x2)
-        expected = pd.Series(
-            [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
-            dtype="boolean",
-        )
-        self.assert_series_equal(result, expected)
-
-        val = s1.iloc[0]
-        result = s1.combine(val, lambda x1, x2: x1 <= x2)
-        expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean")
-        self.assert_series_equal(result, expected)
-
     def test_searchsorted(self, data_for_sorting, as_series):
         # override because we only have 2 unique values
         data_for_sorting = pd.array([True, False], dtype="boolean")
diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py
@@ -173,7 +173,7 @@ class TestMissing(base.BaseMissingTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    pass
+    _combine_le_expected_dtype = object  # TODO: can we make this boolean?
 
 
 class TestCasting(base.BaseCastingTests):
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
@@ -201,7 +201,7 @@ class TestMissing(base.BaseMissingTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    pass
+    _combine_le_expected_dtype = object  # TODO: can we make this boolean?
 
 
 class TestCasting(base.BaseCastingTests):
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
@@ -270,28 +270,7 @@ def test_fillna_frame(self, data_missing):
 
 
 class TestMethods(BaseSparseTests, base.BaseMethodsTests):
-    def test_combine_le(self, data_repeated):
-        # We return a Series[SparseArray].__le__ returns a
-        # Series[Sparse[bool]]
-        # rather than Series[bool]
-        orig_data1, orig_data2 = data_repeated(2)
-        s1 = pd.Series(orig_data1)
-        s2 = pd.Series(orig_data2)
-        result = s1.combine(s2, lambda x1, x2: x1 <= x2)
-        expected = pd.Series(
-            SparseArray(
-                [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
-                fill_value=False,
-            )
-        )
-        self.assert_series_equal(result, expected)
-
-        val = s1.iloc[0]
-        result = s1.combine(val, lambda x1, x2: x1 <= x2)
-        expected = pd.Series(
-            SparseArray([a <= val for a in list(orig_data1)], fill_value=False)
-        )
-        self.assert_series_equal(result, expected)
+    _combine_le_expected_dtype = "Sparse[bool]"
 
     def test_fillna_copy_frame(self, data_missing):
         arr = data_missing.take([1, 1])
diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py
@@ -1,3 +1,8 @@
+from datetime import (
+    date,
+    time,
+    timedelta,
+)
 import pickle
 
 import numpy as np
@@ -67,7 +72,21 @@ def test_arithmetic_ops(all_arithmetic_functions, other):
 
 
 @pytest.mark.parametrize(
-    "other", [NA, 1, 1.0, "a", b"a", np.int64(1), np.nan, np.bool_(True)]
+    "other",
+    [
+        NA,
+        1,
+        1.0,
+        "a",
+        b"a",
+        np.int64(1),
+        np.nan,
+        np.bool_(True),
+        time(0),
+        date(1, 2, 3),
+        timedelta(1),
+        pd.NaT,
+    ],
 )
 def test_comparison_ops(comparison_op, other):
     assert comparison_op(NA, other) is NA