From 2604eb00fea1afa476ac6f4ddadff93ad60ccec0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Sep 2020 16:59:31 -0700 Subject: [PATCH 1/2] ENH: match stdlib behavior for datetimelike comparisons --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/_libs/lib.pyx | 7 -- pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyx | 26 ++++-- pandas/core/arrays/datetimelike.py | 13 ++- pandas/tests/arithmetic/test_datetime64.py | 82 ++++++++++++------- pandas/tests/reductions/test_reductions.py | 10 +-- .../scalar/timestamp/test_comparisons.py | 27 +++--- pandas/tests/series/indexing/test_datetime.py | 2 +- 9 files changed, 103 insertions(+), 68 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 18940b574b517..ae9e741b0595d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -251,7 +251,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.slice_locs` where ``datetime.date`` objects were not accepted (:issue:`34077`) - Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64``, ``timedelta64`` or ``Period`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`,:issue:`36254`) - Inconsistency in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` setitem casting arrays of strings to datetimelike scalars but not scalar strings (:issue:`36261`) -- +- :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a57cf3b523985..c6027d3fe4a1e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -584,13 +584,6 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: elif not (PyObject_RichCompareBool(x, y, Py_EQ) or (x is None or is_nan(x)) and (y is None or is_nan(y))): return False - except TypeError as err: - # Avoid raising TypeError on tzawareness mismatch - # TODO: This try/except can be removed if/when Timestamp - # comparisons are changed to match datetime, see GH#28507 - if "tz-naive and tz-aware" in str(err): - return False - raise except ValueError: # Avoid raising ValueError when comparing Numpy arrays to other types if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 307b6dfc90715..6fb7b1ea8f520 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -19,8 +19,8 @@ cdef class _Timestamp(ABCTimestamp): cdef bint _get_start_end_field(self, str field) cdef _get_date_name_field(self, str field, object locale) cdef int64_t _maybe_convert_value_to_local(self) + cdef bint _can_compare(self, datetime other) cpdef to_datetime64(self) - cdef _assert_tzawareness_compat(_Timestamp self, datetime other) cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, int op) except -1 diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index a01ef98b83693..d2e14722d744f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -260,6 +260,10 @@ cdef class _Timestamp(ABCTimestamp): if other.dtype.kind == "M": if self.tz is None: return PyObject_RichCompare(self.asm8, other, op) + elif op == Py_NE: + return np.ones(other.shape, dtype=np.bool_) + elif op == Py_EQ: + return np.zeros(other.shape, dtype=np.bool_) raise TypeError( "Cannot compare tz-naive and tz-aware timestamps" ) @@ -278,7 +282,12 @@ cdef class _Timestamp(ABCTimestamp): else: return NotImplemented - self._assert_tzawareness_compat(ots) + if not self._can_compare(ots): + if op == Py_NE or op == Py_EQ: + return NotImplemented + raise TypeError( + "Cannot compare tz-naive and tz-aware timestamps" + ) return cmp_scalar(self.value, ots.value, op) cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, @@ -286,16 +295,15 @@ cdef class _Timestamp(ABCTimestamp): cdef: datetime dtval = self.to_pydatetime() - self._assert_tzawareness_compat(other) + if not self._can_compare(other): + return NotImplemented + return PyObject_RichCompareBool(dtval, other, op) - cdef _assert_tzawareness_compat(_Timestamp self, datetime other): - if self.tzinfo is None: - if other.tzinfo is not None: - raise TypeError('Cannot compare tz-naive and tz-aware ' - 'timestamps') - elif other.tzinfo is None: - raise TypeError('Cannot compare tz-naive and tz-aware timestamps') + cdef bint _can_compare(self, datetime other): + if self.tzinfo is not None: + return other.tzinfo is not None + return other.tzinfo is None def __add__(self, other): cdef: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 026aad5ad6eb7..61760bbd1cc67 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -686,7 +686,11 @@ def _validate_comparison_value(self, other, opname: str): if isinstance(other, self._recognized_scalars) or other is NaT: other = self._scalar_type(other) # type: ignore[call-arg] - self._check_compatible_with(other) + try: + self._check_compatible_with(other) + except TypeError as err: + # e.g. tzawareness mismatch + raise InvalidComparison(other) from err elif not is_list_like(other): raise InvalidComparison(other) @@ -697,8 +701,13 @@ def _validate_comparison_value(self, other, opname: str): else: try: other = self._validate_listlike(other, opname, allow_object=True) + self._check_compatible_with(other) except TypeError as err: - raise InvalidComparison(other) from err + if is_object_dtype(getattr(other, "dtype", None)): + # We will have to operate element-wise + pass + else: + raise InvalidComparison(other) from err return other diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 0dd389ed516c7..05334b2319c8c 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -537,26 +537,30 @@ def test_comparison_tzawareness_compat(self, op, box_with_array): dr = tm.box_expected(dr, box) dz = tm.box_expected(dz, box) - msg = "Cannot compare tz-naive and tz-aware" - with pytest.raises(TypeError, match=msg): - op(dr, dz) - if box is pd.DataFrame: tolist = lambda x: x.astype(object).values.tolist()[0] else: tolist = list - with pytest.raises(TypeError, match=msg): - op(dr, tolist(dz)) - with pytest.raises(TypeError, match=msg): - op(dr, np.array(tolist(dz), dtype=object)) - with pytest.raises(TypeError, match=msg): - op(dz, dr) + if op not in [operator.eq, operator.ne]: + msg = ( + r"Invalid comparison between dtype=datetime64\[ns.*\] " + "and (Timestamp|DatetimeArray|list|ndarray)" + ) + with pytest.raises(TypeError, match=msg): + op(dr, dz) - with pytest.raises(TypeError, match=msg): - op(dz, tolist(dr)) - with pytest.raises(TypeError, match=msg): - op(dz, np.array(tolist(dr), dtype=object)) + with pytest.raises(TypeError, match=msg): + op(dr, tolist(dz)) + with pytest.raises(TypeError, match=msg): + op(dr, np.array(tolist(dz), dtype=object)) + with pytest.raises(TypeError, match=msg): + op(dz, dr) + + with pytest.raises(TypeError, match=msg): + op(dz, tolist(dr)) + with pytest.raises(TypeError, match=msg): + op(dz, np.array(tolist(dr), dtype=object)) # The aware==aware and naive==naive comparisons should *not* raise assert np.all(dr == dr) @@ -588,17 +592,20 @@ def test_comparison_tzawareness_compat_scalars(self, op, box_with_array): ts_tz = pd.Timestamp("2000-03-14 01:59", tz="Europe/Amsterdam") assert np.all(dr > ts) - msg = "Cannot compare tz-naive and tz-aware" - with pytest.raises(TypeError, match=msg): - op(dr, ts_tz) + msg = r"Invalid comparison between dtype=datetime64\[ns.*\] and Timestamp" + if op not in [operator.eq, operator.ne]: + with pytest.raises(TypeError, match=msg): + op(dr, ts_tz) assert np.all(dz > ts_tz) - with pytest.raises(TypeError, match=msg): - op(dz, ts) + if op not in [operator.eq, operator.ne]: + with pytest.raises(TypeError, match=msg): + op(dz, ts) - # GH#12601: Check comparison against Timestamps and DatetimeIndex - with pytest.raises(TypeError, match=msg): - op(ts, dz) + if op not in [operator.eq, operator.ne]: + # GH#12601: Check comparison against Timestamps and DatetimeIndex + with pytest.raises(TypeError, match=msg): + op(ts, dz) @pytest.mark.parametrize( "op", @@ -618,13 +625,28 @@ def test_scalar_comparison_tzawareness( ): tz = tz_aware_fixture dti = pd.date_range("2016-01-01", periods=2, tz=tz) + xbox = box_with_array if box_with_array is not pd.Index else np.ndarray dtarr = tm.box_expected(dti, box_with_array) - msg = "Cannot compare tz-naive and tz-aware" - with pytest.raises(TypeError, match=msg): - op(dtarr, other) - with pytest.raises(TypeError, match=msg): - op(other, dtarr) + if op in [operator.eq, operator.ne]: + exbool = op is operator.ne + expected = np.array([exbool, exbool], dtype=bool) + expected = tm.box_expected(expected, xbox) + + result = op(dtarr, other) + tm.assert_equal(result, expected) + + result = op(other, dtarr) + tm.assert_equal(result, expected) + else: + msg = ( + r"Invalid comparison between dtype=datetime64\[ns, .*\] " + f"and {type(other).__name__}" + ) + with pytest.raises(TypeError, match=msg): + op(dtarr, other) + with pytest.raises(TypeError, match=msg): + op(other, dtarr) @pytest.mark.parametrize( "op", @@ -724,10 +746,8 @@ def test_dti_cmp_object_dtype(self): tm.assert_numpy_array_equal(result, expected) other = dti.tz_localize(None) - msg = "Cannot compare tz-naive and tz-aware" - with pytest.raises(TypeError, match=msg): - # tzawareness failure - dti != other + result = dti != other + tm.assert_numpy_array_equal(result, expected) other = np.array(list(dti[:5]) + [Timedelta(days=1)] * 5) result = dti == other diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index bbf2d9f1f0784..230f45be8d06f 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -56,13 +56,13 @@ def test_ops(self, opname, obj): expected = getattr(obj.values, opname)() else: expected = pd.Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq) - try: - assert result == expected - except TypeError: - # comparing tz-aware series with np.array results in - # TypeError + + if getattr(obj, "tz", None) is not None: + # We need to de-localize before comparing to the numpy-produced result expected = expected.astype("M8[ns]").astype("int64") assert result.value == expected + else: + assert result == expected @pytest.mark.parametrize("opname", ["max", "min"]) @pytest.mark.parametrize( diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index 71693a9ca61ce..3d1f71def5836 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -56,9 +56,18 @@ def test_comparison_dt64_ndarray_tzaware(self, reverse, all_compare_operators): if reverse: left, right = arr, ts - msg = "Cannot compare tz-naive and tz-aware timestamps" - with pytest.raises(TypeError, match=msg): - op(left, right) + if op is operator.eq: + expected = np.array([False, False], dtype=bool) + result = op(left, right) + tm.assert_numpy_array_equal(result, expected) + elif op is operator.ne: + expected = np.array([True, True], dtype=bool) + result = op(left, right) + tm.assert_numpy_array_equal(result, expected) + else: + msg = "Cannot compare tz-naive and tz-aware timestamps" + with pytest.raises(TypeError, match=msg): + op(left, right) def test_comparison_object_array(self): # GH#15183 @@ -139,10 +148,8 @@ def test_cant_compare_tz_naive_w_aware(self, utc_fixture): b = Timestamp("3/12/2012", tz=utc_fixture) msg = "Cannot compare tz-naive and tz-aware timestamps" - with pytest.raises(TypeError, match=msg): - a == b - with pytest.raises(TypeError, match=msg): - a != b + assert not a == b + assert a != b with pytest.raises(TypeError, match=msg): a < b with pytest.raises(TypeError, match=msg): @@ -152,10 +159,8 @@ def test_cant_compare_tz_naive_w_aware(self, utc_fixture): with pytest.raises(TypeError, match=msg): a >= b - with pytest.raises(TypeError, match=msg): - b == a - with pytest.raises(TypeError, match=msg): - b != a + assert not b == a + assert b != a with pytest.raises(TypeError, match=msg): b < a with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 088f8681feb99..8f12888c89164 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -259,7 +259,7 @@ def test_getitem_setitem_datetimeindex(): lb = datetime(1990, 1, 1, 4) rb = datetime(1990, 1, 1, 7) - msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime" with pytest.raises(TypeError, match=msg): # tznaive vs tzaware comparison is invalid # see GH#18376, GH#18162 From a762875dee7fb5980725b43f316e59f44b71c527 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 2 Oct 2020 18:50:17 -0700 Subject: [PATCH 2/2] update test --- pandas/tests/arithmetic/test_datetime64.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 74b38d634e652..d05df485f662e 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -635,9 +635,10 @@ def test_comparison_tzawareness_compat_scalars(self, op, box_with_array): def test_scalar_comparison_tzawareness( self, op, other, tz_aware_fixture, box_with_array ): + box = box_with_array tz = tz_aware_fixture dti = pd.date_range("2016-01-01", periods=2, tz=tz) - xbox = box_with_array if box_with_array is not pd.Index else np.ndarray + xbox = box if box not in [pd.Index, pd.array] else np.ndarray dtarr = tm.box_expected(dti, box_with_array) if op in [operator.eq, operator.ne]: