Skip to content

WIP: Timestamp/DTA match stdlib tzawareness-compat behavior #33415

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/c_timestamp.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ cdef class _Timestamp(datetime):
cpdef _get_date_name_field(self, object field, object locale)
cdef int64_t _maybe_convert_value_to_local(self)
cpdef to_datetime64(self)
cdef _assert_tzawareness_compat(_Timestamp self, datetime other)
cdef _assert_tzawareness_compat(_Timestamp self, datetime other, int op)
cpdef datetime to_pydatetime(_Timestamp self, bint warn=*)
cdef bint _compare_outside_nanorange(_Timestamp self, datetime other,
int op) except -1
14 changes: 11 additions & 3 deletions pandas/_libs/tslibs/c_timestamp.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ cdef class _Timestamp(datetime):
else:
return NotImplemented

self._assert_tzawareness_compat(other)
if self._assert_tzawareness_compat(other, op) is False:
return op == Py_NE
return cmp_scalar(self.value, ots.value, op)

def __reduce_ex__(self, protocol):
Expand Down Expand Up @@ -170,7 +171,8 @@ cdef class _Timestamp(datetime):
cdef:
datetime dtval = self.to_pydatetime()

self._assert_tzawareness_compat(other)
if self._assert_tzawareness_compat(other, op) is False:
return op == Py_NE

if self.nanosecond == 0:
return PyObject_RichCompareBool(dtval, other, op)
Expand All @@ -188,12 +190,18 @@ cdef class _Timestamp(datetime):
elif op == Py_GE:
return dtval >= other

cdef _assert_tzawareness_compat(_Timestamp self, datetime other):
cdef _assert_tzawareness_compat(_Timestamp self, datetime other, int op):
is_inequality = not (op == Py_EQ or op == Py_NE)

if self.tzinfo is None:
if other.tzinfo is not None:
if not is_inequality:
return False
raise TypeError('Cannot compare tz-naive and tz-aware '
'timestamps')
elif other.tzinfo is None:
if not is_inequality:
return False
raise TypeError('Cannot compare tz-naive and tz-aware timestamps')

cpdef datetime to_pydatetime(_Timestamp self, bint warn=True):
Expand Down
18 changes: 14 additions & 4 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ def wrapper(self, other):

if isinstance(other, self._recognized_scalars) or other is NaT:
other = self._scalar_type(other)
self._check_compatible_with(other)
if self._check_compatible_with(other, op=op) is False:
if op is operator.ne:
return np.ones(self.shape, dtype=bool)
return np.zeros(self.shape, dtype=bool)

other_i8 = self._unbox_scalar(other)

Expand Down Expand Up @@ -111,7 +114,10 @@ def wrapper(self, other):
else:
# For PeriodDType this casting is unnecessary
other = type(self)._from_sequence(other)
self._check_compatible_with(other)
if self._check_compatible_with(other, op=op) is False:
if op is operator.eq:
return np.zeros(self.shape, dtype=bool)
return np.ones(self.shape, dtype=bool)

result = op(self.view("i8"), other.view("i8"))
o_mask = other._isnan
Expand Down Expand Up @@ -187,7 +193,10 @@ def _unbox_scalar(self, value: Union[Period, Timestamp, Timedelta, NaTType]) ->
raise AbstractMethodError(self)

def _check_compatible_with(
self, other: Union[Period, Timestamp, Timedelta, NaTType], setitem: bool = False
self,
other: Union[Period, Timestamp, Timedelta, NaTType],
setitem: bool = False,
op=None,
) -> None:
"""
Verify that `self` and `other` are compatible.
Expand All @@ -204,6 +213,7 @@ def _check_compatible_with(
setitem : bool, default False
For __setitem__ we may have stricter compatibility resrictions than
for comparisons.
op : None or an operator.{eq,ne,lt,le,gt,ge} function

Raises
------
Expand Down Expand Up @@ -853,7 +863,7 @@ def searchsorted(self, value, side="left", sorter=None):
raise TypeError(f"Unexpected type for 'value': {type(value)}")

if isinstance(value, type(self)):
self._check_compatible_with(value)
self._check_compatible_with(value) # TODO: this is operator.lt?
value = value.asi8
else:
value = self._unbox_scalar(value)
Expand Down
30 changes: 17 additions & 13 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, time, timedelta
import operator
from typing import Union
import warnings

Expand Down Expand Up @@ -455,10 +456,11 @@ def _unbox_scalar(self, value):
def _scalar_from_string(self, value):
return Timestamp(value, tz=self.tz)

def _check_compatible_with(self, other, setitem: bool = False):
def _check_compatible_with(self, other, setitem: bool = False, op=None):
if other is NaT:
return
self._assert_tzawareness_compat(other)
if self._assert_tzawareness_compat(other, op=op) is False:
return False
if setitem:
# Stricter check for setitem vs comparison methods
if not timezones.tz_compare(self.tz, other.tz):
Expand Down Expand Up @@ -627,26 +629,28 @@ def _has_same_tz(self, other):
# convert to Timestamp as np.datetime64 doesn't have tz attr
other = Timestamp(other)
vzone = timezones.get_timezone(getattr(other, "tzinfo", "__no_tz__"))
return zzone == vzone
return zzone == vzone # TODO: use tz_compare?

def _assert_tzawareness_compat(self, other):
def _assert_tzawareness_compat(self, other, op=None):
# adapted from _Timestamp._assert_tzawareness_compat

# GH#???? match stdlib behavior
is_inequality = not (op is operator.eq or op is operator.ne)

msg = "Cannot compare tz-naive and tz-aware datetime-like objects."
other_tz = getattr(other, "tzinfo", None)
if is_datetime64tz_dtype(other):
# Get tzinfo from Series dtype
other_tz = other.dtype.tz
if other is NaT:
# pd.NaT quacks both aware and naive
pass
elif self.tz is None:
if other_tz is not None:
raise TypeError(
"Cannot compare tz-naive and tz-aware datetime-like objects."
)
elif other_tz is None:
raise TypeError(
"Cannot compare tz-naive and tz-aware datetime-like objects"
)
elif int(self.tz is None) + int(other_tz is None) == 1:
# i.e. one but not the other
if is_inequality:
raise TypeError(msg)
return False
return None

# -----------------------------------------------------------------
# Arithmetic Methods
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,10 @@ def _unbox_scalar(self, value: Union[Period, NaTType]) -> int:
def _scalar_from_string(self, value: str) -> Period:
return Period(value, freq=self.freq)

def _check_compatible_with(self, other, setitem: bool = False):
def _check_compatible_with(self, other, setitem: bool = False, op=None):
if other is NaT:
return
if self.freqstr != other.freqstr:
if self.freqstr != other.freqstr: # TODO: dont raise for eq/ne comparison?
raise raise_on_incompatible(self, other)

# --------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def _unbox_scalar(self, value):
def _scalar_from_string(self, value):
return Timedelta(value)

def _check_compatible_with(self, other, setitem: bool = False):
def _check_compatible_with(self, other, setitem: bool = False, op=None):
# we don't have anything to validate.
pass

Expand Down
105 changes: 70 additions & 35 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,25 +539,43 @@ def test_comparison_tzawareness_compat(self, op, box_df_fail):
dr = tm.box_expected(dr, box)
dz = tm.box_expected(dz, box)

msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
op(dr, dz)
if op not in [operator.eq, operator.ne]:
msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
op(dr, dz)

# FIXME: DataFrame case fails to raise for == and !=, wrong
# message for inequalities
with pytest.raises(TypeError, match=msg):
op(dr, list(dz))
with pytest.raises(TypeError, match=msg):
op(dr, np.array(list(dz), dtype=object))
with pytest.raises(TypeError, match=msg):
op(dz, dr)
# FIXME: DataFrame case fails to raise for == and !=, wrong
# message for inequalities
with pytest.raises(TypeError, match=msg):
op(dr, list(dz))
with pytest.raises(TypeError, match=msg):
op(dr, np.array(list(dz), dtype=object))
with pytest.raises(TypeError, match=msg):
op(dz, dr)

# FIXME: DataFrame case fails to raise for == and !=, wrong
# message for inequalities
with pytest.raises(TypeError, match=msg):
op(dz, list(dr))
with pytest.raises(TypeError, match=msg):
op(dz, np.array(list(dr), dtype=object))
# FIXME: DataFrame case fails to raise for == and !=, wrong
# message for inequalities
with pytest.raises(TypeError, match=msg):
op(dz, list(dr))
with pytest.raises(TypeError, match=msg):
op(dz, np.array(list(dr), dtype=object))

elif op is operator.eq:

assert not np.any(op(dr, dz))
assert not np.any(op(dr, list(dz)))
assert not np.any(op(dr, np.array(list(dz), dtype=object)))
assert not np.any(op(dz, dr))
assert not np.any(op(dz, list(dr)))
assert not np.any(op(dz, np.array(list(dr), dtype=object)))

else:
assert np.all(op(dr, dz))
assert np.all(op(dr, list(dz)))
assert np.all(op(dr, np.array(list(dz), dtype=object)))
assert np.all(op(dz, dr))
assert np.all(op(dz, list(dr)))
assert np.all(op(dz, np.array(list(dr), dtype=object)))

# The aware==aware and naive==naive comparisons should *not* raise
assert np.all(dr == dr)
Expand Down Expand Up @@ -589,17 +607,27 @@ def test_comparison_tzawareness_compat_scalars(self, op, box_with_array):
ts_tz = pd.Timestamp("2000-03-14 01:59", tz="Europe/Amsterdam")

assert np.all(dr > ts)
msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
op(dr, ts_tz)

assert np.all(dz > ts_tz)
with pytest.raises(TypeError, match=msg):
op(dz, ts)
if op not in [operator.eq, operator.ne]:
msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
op(dr, ts_tz)

# GH#12601: Check comparison against Timestamps and DatetimeIndex
with pytest.raises(TypeError, match=msg):
op(ts, dz)
assert np.all(dz > ts_tz)
with pytest.raises(TypeError, match=msg):
op(dz, ts)

# GH#12601: Check comparison against Timestamps and DatetimeIndex
with pytest.raises(TypeError, match=msg):
op(ts, dz)
elif op is operator.eq:
assert not np.any(op(dr, ts_tz))
assert not np.any(op(dz, ts))
assert not np.any(op(ts, dz))
else:
assert np.all(op(dr, ts_tz))
assert np.all(op(dz, ts))
assert np.all(op(ts, dz))

@pytest.mark.parametrize(
"op",
Expand All @@ -621,11 +649,19 @@ def test_scalar_comparison_tzawareness(
dti = pd.date_range("2016-01-01", periods=2, tz=tz)

dtarr = tm.box_expected(dti, box_with_array)
msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
op(dtarr, other)
with pytest.raises(TypeError, match=msg):
op(other, dtarr)
if op not in [operator.eq, operator.ne]:
msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
op(dtarr, other)
with pytest.raises(TypeError, match=msg):
op(other, dtarr)
elif op is operator.eq:

assert not np.any(op(dtarr, other))
assert not np.any(op(other, dtarr))
else:
assert np.all(op(dtarr, other))
assert np.all(op(other, dtarr))

@pytest.mark.parametrize(
"op",
Expand Down Expand Up @@ -725,10 +761,9 @@ def test_dti_cmp_object_dtype(self):
tm.assert_numpy_array_equal(result, expected)

other = dti.tz_localize(None)
msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
# tzawareness failure
dti != other
expected = np.ones(dti.shape, dtype=bool)
result = dti != other
tm.assert_numpy_array_equal(result, expected)

other = np.array(list(dti[:5]) + [Timedelta(days=1)] * 5)
result = dti == other
Expand Down
14 changes: 6 additions & 8 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,15 @@ class TestReductions:
@pytest.mark.parametrize("obj", objs)
def test_ops(self, opname, obj):
result = getattr(obj, opname)()
if not isinstance(obj, PeriodIndex):
if isinstance(obj, DatetimeIndex):
expected = getattr(obj.values, opname)()
expected = obj._box_func(expected)
elif not isinstance(obj, PeriodIndex):
expected = getattr(obj.values, opname)()
else:
expected = pd.Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq)
try:
assert result == expected
except TypeError:
# comparing tz-aware series with np.array results in
# TypeError
expected = expected.astype("M8[ns]").astype("int64")
assert result.value == expected

assert result == expected

@pytest.mark.parametrize("opname", ["max", "min"])
@pytest.mark.parametrize(
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/scalar/timestamp/test_comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,9 @@ def test_cant_compare_tz_naive_w_aware(self, utc_fixture):
a = Timestamp("3/12/2012")
b = Timestamp("3/12/2012", tz=utc_fixture)

assert not a == b
assert a != b
msg = "Cannot compare tz-naive and tz-aware timestamps"
with pytest.raises(TypeError, match=msg):
a == b
with pytest.raises(TypeError, match=msg):
a != b
with pytest.raises(TypeError, match=msg):
a < b
with pytest.raises(TypeError, match=msg):
Expand All @@ -152,10 +150,8 @@ def test_cant_compare_tz_naive_w_aware(self, utc_fixture):
with pytest.raises(TypeError, match=msg):
a >= b

with pytest.raises(TypeError, match=msg):
b == a
with pytest.raises(TypeError, match=msg):
b != a
assert not b == a
assert b != a
with pytest.raises(TypeError, match=msg):
b < a
with pytest.raises(TypeError, match=msg):
Expand Down