Skip to content

BUG/TST: Fix TimedeltaIndex comparisons with invalid types #24056

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 5, 2018
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1307,6 +1307,7 @@ Timedelta
- Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`)
- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`)
- Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`)
- Bug in :class:`TimedeltaIndex` comparison operations where comparing against non-``Timedelta``-like objects would raise ``TypeError`` instead of returning all-``False`` for ``__eq__`` and all-``True`` for ``__ne__`` (:issue:`24056`)

Timezones
^^^^^^^^^
Expand Down
9 changes: 9 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
# by returning NotImplemented
timetuple = None

# Needed so that Timestamp.__richcmp__(DateTimeArray) operates pointwise
ndim = 1

# ensure that operations with numpy arrays defer to our implementation
__array_priority__ = 1000

Expand Down Expand Up @@ -217,6 +220,12 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if not hasattr(values, "dtype"):
if np.ndim(values) == 0:
# i.e. iterator
values = list(values)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you have a test hit this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

values = np.array(values)

if is_object_dtype(values):
# kludge; dispatch until the DatetimeArray constructor is complete
from pandas import DatetimeIndex
Expand Down
21 changes: 14 additions & 7 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,25 +72,29 @@ def _td_array_cmp(cls, op):
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False

meth = getattr(dtl.DatetimeLikeArrayMixin, opname)

def wrapper(self, other):
msg = "cannot compare a {cls} with type {typ}"
meth = getattr(dtl.DatetimeLikeArrayMixin, opname)
if _is_convertible_to_td(other) or other is NaT:
try:
other = _to_m8(other)
except ValueError:
# failed to parse as timedelta
raise TypeError(msg.format(cls=type(self).__name__,
typ=type(other).__name__))
return ops.invalid_comparison(self, other, op)

result = meth(self, other)
if isna(other):
result.fill(nat_result)

elif not is_list_like(other):
raise TypeError(msg.format(cls=type(self).__name__,
typ=type(other).__name__))
return ops.invalid_comparison(self, other, op)

else:
other = type(self)(other)._data
try:
other = type(self)(other)._data
except (ValueError, TypeError):
return ops.invalid_comparison(self, other, op)

result = meth(self, other)
result = com.values_from_object(result)

Expand Down Expand Up @@ -133,6 +137,9 @@ class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
_typ = "timedeltaarray"
__array_priority__ = 1000

# Needed so that NaT.__richcmp__(DateTimeArray) operates pointwise
ndim = 1

@property
def _box_func(self):
return lambda x: Timedelta(x, unit='ns')
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ class NDFrame(PandasObject, SelectionMixin):
_metadata = []
_is_copy = None

# dummy attribute so that datetime.__eq__(Series/DataFrame) defers
# by returning NotImplemented
timetuple = None

# ----------------------------------------------------------------------
# Constructors

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,7 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None):
import pandas.core.computation.expressions as expressions

right = lib.item_from_zerodim(right)
if lib.is_scalar(right):
if lib.is_scalar(right) or np.ndim(right) == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this needed? this should already be handled by is_scalar

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because there are lots of weird things a user could pass that are zero-dim but not caught by is_scalar. The test case that hits this just passes object()


def column_op(a, b):
return {i: func(a.iloc[:, i], b)
Expand Down
130 changes: 89 additions & 41 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@
DatetimeIndex, TimedeltaIndex)


def assert_all(obj):
"""
Test helper to call call obj.all() the appropriate number of times on
a Series or DataFrame.
"""
if isinstance(obj, pd.DataFrame):
assert obj.all().all()
else:
assert obj.all()


# ------------------------------------------------------------------
# Comparisons

Expand Down Expand Up @@ -86,11 +97,11 @@ def test_comparison_invalid(self, box_with_array):
[Period('2011-01', freq='M'), NaT, Period('2011-03', freq='M')]
])
@pytest.mark.parametrize('dtype', [None, object])
def test_nat_comparisons_scalar(self, dtype, data, box):
xbox = box if box is not pd.Index else np.ndarray
def test_nat_comparisons_scalar(self, dtype, data, box_with_array):
xbox = box_with_array if box_with_array is not pd.Index else np.ndarray

left = Series(data, dtype=dtype)
left = tm.box_expected(left, box)
left = tm.box_expected(left, box_with_array)

expected = [False, False, False]
expected = tm.box_expected(expected, xbox)
Expand Down Expand Up @@ -290,23 +301,24 @@ def test_dti_cmp_datetimelike(self, other, tz_naive_fixture):
expected = np.array([True, False])
tm.assert_numpy_array_equal(result, expected)

def dti_cmp_non_datetime(self, tz_naive_fixture):
def dt64arr_cmp_non_datetime(self, tz_naive_fixture, box_with_array):
# GH#19301 by convention datetime.date is not considered comparable
# to Timestamp or DatetimeIndex. This may change in the future.
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=2, tz=tz)
dtarr = tm.box_expected(dti, box_with_array)

other = datetime(2016, 1, 1).date()
assert not (dti == other).any()
assert (dti != other).all()
assert not (dtarr == other).any()
assert (dtarr != other).all()
with pytest.raises(TypeError):
dti < other
dtarr < other
with pytest.raises(TypeError):
dti <= other
dtarr <= other
with pytest.raises(TypeError):
dti > other
dtarr > other
with pytest.raises(TypeError):
dti >= other
dtarr >= other

@pytest.mark.parametrize('other', [None, np.nan, pd.NaT])
def test_dti_eq_null_scalar(self, other, tz_naive_fixture):
Expand All @@ -323,49 +335,62 @@ def test_dti_ne_null_scalar(self, other, tz_naive_fixture):
assert (dti != other).all()

@pytest.mark.parametrize('other', [None, np.nan])
def test_dti_cmp_null_scalar_inequality(self, tz_naive_fixture, other):
def test_dti_cmp_null_scalar_inequality(self, tz_naive_fixture, other,
box_with_array):
# GH#19301
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=2, tz=tz)
# FIXME: ValueError with transpose
dtarr = tm.box_expected(dti, box_with_array, transpose=False)

with pytest.raises(TypeError):
dti < other
dtarr < other
with pytest.raises(TypeError):
dti <= other
dtarr <= other
with pytest.raises(TypeError):
dti > other
dtarr > other
with pytest.raises(TypeError):
dti >= other
dtarr >= other

@pytest.mark.parametrize('dtype', [None, object])
def test_dti_cmp_nat(self, dtype):
def test_dti_cmp_nat(self, dtype, box_with_array):
xbox = box_with_array if box_with_array is not pd.Index else np.ndarray

left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT,
pd.Timestamp('2011-01-03')])
right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')])

left = tm.box_expected(left, box_with_array)
right = tm.box_expected(right, box_with_array)

lhs, rhs = left, right
if dtype is object:
lhs, rhs = left.astype(object), right.astype(object)

result = rhs == lhs
expected = np.array([False, False, True])
tm.assert_numpy_array_equal(result, expected)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)

result = lhs != rhs
expected = np.array([True, True, False])
tm.assert_numpy_array_equal(result, expected)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)

expected = np.array([False, False, False])
tm.assert_numpy_array_equal(lhs == pd.NaT, expected)
tm.assert_numpy_array_equal(pd.NaT == rhs, expected)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(lhs == pd.NaT, expected)
tm.assert_equal(pd.NaT == rhs, expected)

expected = np.array([True, True, True])
tm.assert_numpy_array_equal(lhs != pd.NaT, expected)
tm.assert_numpy_array_equal(pd.NaT != lhs, expected)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(lhs != pd.NaT, expected)
tm.assert_equal(pd.NaT != lhs, expected)

expected = np.array([False, False, False])
tm.assert_numpy_array_equal(lhs < pd.NaT, expected)
tm.assert_numpy_array_equal(pd.NaT > lhs, expected)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(lhs < pd.NaT, expected)
tm.assert_equal(pd.NaT > lhs, expected)

def test_dti_cmp_nat_behaves_like_float_cmp_nan(self):
fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0])
Expand Down Expand Up @@ -459,36 +484,47 @@ def test_dti_cmp_nat_behaves_like_float_cmp_nan(self):
@pytest.mark.parametrize('op', [operator.eq, operator.ne,
operator.gt, operator.ge,
operator.lt, operator.le])
def test_comparison_tzawareness_compat(self, op):
def test_comparison_tzawareness_compat(self, op, box_with_array):
# GH#18162
dr = pd.date_range('2016-01-01', periods=6)
dz = dr.tz_localize('US/Pacific')

# FIXME: ValueError with transpose
dr = tm.box_expected(dr, box_with_array, transpose=False)
dz = tm.box_expected(dz, box_with_array, transpose=False)

with pytest.raises(TypeError):
op(dr, dz)
with pytest.raises(TypeError):
op(dr, list(dz))
if box_with_array is not pd.DataFrame:
# DataFrame op is invalid until transpose bug is fixed
with pytest.raises(TypeError):
op(dr, list(dz))
with pytest.raises(TypeError):
op(dz, dr)
with pytest.raises(TypeError):
op(dz, list(dr))
if box_with_array is not pd.DataFrame:
# DataFrame op is invalid until transpose bug is fixed
with pytest.raises(TypeError):
op(dz, list(dr))

# Check that there isn't a problem aware-aware and naive-naive do not
# raise
assert (dr == dr).all()
assert (dr == list(dr)).all()
assert (dz == dz).all()
assert (dz == list(dz)).all()
assert_all(dr == dr)
assert_all(dz == dz)
if box_with_array is not pd.DataFrame:
# DataFrame doesn't align the lists correctly unless we transpose,
# which we cannot do at the moment
assert (dr == list(dr)).all()
assert (dz == list(dz)).all()

# Check comparisons against scalar Timestamps
ts = pd.Timestamp('2000-03-14 01:59')
ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam')

assert (dr > ts).all()
assert_all(dr > ts)
with pytest.raises(TypeError):
op(dr, ts_tz)

assert (dz > ts_tz).all()
assert_all(dz > ts_tz)
with pytest.raises(TypeError):
op(dz, ts)

Expand All @@ -502,13 +538,18 @@ def test_comparison_tzawareness_compat(self, op):
@pytest.mark.parametrize('other', [datetime(2016, 1, 1),
Timestamp('2016-01-01'),
np.datetime64('2016-01-01')])
def test_scalar_comparison_tzawareness(self, op, other, tz_aware_fixture):
def test_scalar_comparison_tzawareness(self, op, other, tz_aware_fixture,
box_with_array):
tz = tz_aware_fixture
dti = pd.date_range('2016-01-01', periods=2, tz=tz)

# FIXME: ValueError with transpose
dtarr = tm.box_expected(dti, box_with_array, transpose=False)

with pytest.raises(TypeError):
op(dti, other)
op(dtarr, other)
with pytest.raises(TypeError):
op(other, dti)
op(other, dtarr)

@pytest.mark.parametrize('op', [operator.eq, operator.ne,
operator.gt, operator.ge,
Expand Down Expand Up @@ -558,18 +599,25 @@ def test_dti_cmp_str(self, tz_naive_fixture):

@pytest.mark.parametrize('other', ['foo', 99, 4.0,
object(), timedelta(days=2)])
def test_dti_cmp_scalar_invalid(self, other, tz_naive_fixture):
def test_dt64arr_cmp_scalar_invalid(self, other, tz_naive_fixture,
box_with_array):
# GH#22074
tz = tz_naive_fixture
xbox = box_with_array if box_with_array is not pd.Index else np.ndarray

rng = date_range('1/1/2000', periods=10, tz=tz)
# FIXME: ValueError with transpose
rng = tm.box_expected(rng, box_with_array, transpose=False)

result = rng == other
expected = np.array([False] * 10)
tm.assert_numpy_array_equal(result, expected)
expected = tm.box_expected(expected, xbox, transpose=False)
tm.assert_equal(result, expected)

result = rng != other
expected = np.array([True] * 10)
tm.assert_numpy_array_equal(result, expected)
expected = tm.box_expected(expected, xbox, transpose=False)
tm.assert_equal(result, expected)

with pytest.raises(TypeError):
rng < other
Expand Down
26 changes: 18 additions & 8 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,31 @@ def test_compare_timedelta_series(self):
expected = pd.Series([False, True])
tm.assert_series_equal(actual, expected)

def test_tdi_cmp_str_invalid(self):
def test_tdi_cmp_str_invalid(self, box_with_array):
# GH#13624
xbox = box_with_array if box_with_array is not pd.Index else np.ndarray
tdi = TimedeltaIndex(['1 day', '2 days'])
tdarr = tm.box_expected(tdi, box_with_array)

for left, right in [(tdi, 'a'), ('a', tdi)]:
for left, right in [(tdarr, 'a'), ('a', tdarr)]:
with pytest.raises(TypeError):
left > right

with pytest.raises(TypeError):
# FIXME: Shouldn't this return all-False?
left == right

left >= right
with pytest.raises(TypeError):
# FIXME: Shouldn't this return all-True?
left != right
left < right
with pytest.raises(TypeError):
left <= right

result = left == right
expected = np.array([False, False], dtype=bool)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)

result = left != right
expected = np.array([True, True], dtype=bool)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)

@pytest.mark.parametrize('dtype', [None, object])
def test_comp_nat(self, dtype):
Expand Down
Loading