Skip to content

BUG/CLN: datetimelike Index.equals may return True with non-Index #13986

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,7 @@ Bug Fixes
- Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`)
- Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`)
- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`)
- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`)


- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
Expand Down
11 changes: 9 additions & 2 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1618,8 +1618,15 @@ def equals(self, other):
if not isinstance(other, Index):
return False

return array_equivalent(_values_from_object(self),
_values_from_object(other))
if is_object_dtype(self) and not is_object_dtype(other):
# if other is not object, use other's logic for coercion
return other.equals(self)

try:
return array_equivalent(_values_from_object(self),
_values_from_object(other))
except:
return False

def identical(self, other):
"""Similar to equals, but check that other comparable attributes are
Expand Down
3 changes: 3 additions & 0 deletions pandas/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ def equals(self, other):
if self.is_(other):
return True

if not isinstance(other, Index):
return False

try:
other = self._is_dtype_compat(other)
return array_equivalent(self._data, other)
Expand Down
4 changes: 4 additions & 0 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1436,6 +1436,7 @@ def reindex(self, target, method=None, level=None, limit=None,
return_indexers=True,
keep_order=False)
else:
target = _ensure_index(target)
if self.equals(target):
indexer = None
else:
Expand Down Expand Up @@ -1984,6 +1985,9 @@ def equals(self, other):
if self.is_(other):
return True

if not isinstance(other, Index):
return False

if not isinstance(other, MultiIndex):
return array_equivalent(self._values,
_values_from_object(_ensure_index(other)))
Expand Down
15 changes: 4 additions & 11 deletions pandas/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pandas.types.common import (is_dtype_equal, pandas_dtype,
is_float_dtype, is_object_dtype,
is_integer_dtype, is_scalar)
from pandas.types.missing import array_equivalent, isnull
from pandas.types.missing import isnull
from pandas.core.common import _values_from_object

from pandas import compat
Expand Down Expand Up @@ -160,16 +160,6 @@ def _convert_scalar_indexer(self, key, kind=None):
return (super(Int64Index, self)
._convert_scalar_indexer(key, kind=kind))

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

return array_equivalent(_values_from_object(self),
_values_from_object(other))

def _wrap_joined_index(self, joined, other):
name = self.name if self.name == other.name else None
return Int64Index(joined, name=name)
Expand Down Expand Up @@ -306,6 +296,9 @@ def equals(self, other):
if self is other:
return True

if not isinstance(other, Index):
return False

# need to compare nans locations and make sure that they are the same
# since nans don't compare equal this is a bit tricky
try:
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,20 @@ def test_delete_base(self):
# either depending on numpy version
result = idx.delete(len(idx))

def test_equals(self):

for name, idx in compat.iteritems(self.indices):
self.assertTrue(idx.equals(idx))
self.assertTrue(idx.equals(idx.copy()))
self.assertTrue(idx.equals(idx.astype(object)))

self.assertFalse(idx.equals(list(idx)))
self.assertFalse(idx.equals(np.array(idx)))

if idx.nlevels == 1:
# do not test MultiIndex
self.assertFalse(idx.equals(pd.Series(idx)))

def test_equals_op(self):
# GH9947, GH10637
index_a = self.create_index()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ def test_astype(self):
casted = self.intIndex.astype('i8')
self.assertEqual(casted.name, 'foobar')

def test_equals(self):
def test_equals_object(self):
# same
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this tests doesn't propogate to the other tests classes. So maybe need a more generic one?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nvm. I see we override this in the sub-classes.

self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c'])))

Expand Down
37 changes: 24 additions & 13 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def test_ensure_copied_data(self):
result = CategoricalIndex(index.values, copy=False)
self.assertIs(_base(index.values), _base(result.values))

def test_equals(self):
def test_equals_categorical(self):

ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'],
Expand Down Expand Up @@ -556,19 +556,30 @@ def test_equals(self):

# tests
# make sure that we are testing for category inclusion properly
self.assertTrue(CategoricalIndex(
list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca')))
ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b'])
self.assertFalse(ci.equals(list('aabca')))
self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
self.assertTrue(ci.equals(ci.copy()))

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
ci = CategoricalIndex(list('aabca'),
categories=['c', 'a', 'b', np.nan])
self.assertFalse(ci.equals(list('aabca')))
self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.assertTrue(CategoricalIndex(
list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list(
'aabca')))

self.assertFalse(CategoricalIndex(
list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list(
'aabca')))
self.assertTrue(CategoricalIndex(
list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list(
'aabca') + [np.nan]))
self.assertTrue(ci.equals(ci.copy()))

ci = CategoricalIndex(list('aabca') + [np.nan],
categories=['c', 'a', 'b'])
self.assertFalse(ci.equals(list('aabca')))
self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
self.assertTrue(ci.equals(ci.copy()))

ci = CategoricalIndex(list('aabca') + [np.nan],
categories=['c', 'a', 'b'])
self.assertFalse(ci.equals(list('aabca') + [np.nan]))
self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan])))
self.assertTrue(ci.equals(ci.copy()))

def test_string_categorical_index_repr(self):
# short
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,7 @@ def test_to_hierarchical(self):
def test_bounds(self):
self.index._bounds

def test_equals(self):
def test_equals_multi(self):
self.assertTrue(self.index.equals(self.index))
self.assertTrue(self.index.equal_levels(self.index))

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_astype(self):
i = Float64Index([0, 1.1, np.NAN])
self.assertRaises(ValueError, lambda: i.astype(dtype))

def test_equals(self):
def test_equals_numeric(self):

i = Float64Index([1.0, 2.0])
self.assertTrue(i.equals(i))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def test_is_monotonic(self):
self.assertTrue(index.is_monotonic_increasing)
self.assertTrue(index.is_monotonic_decreasing)

def test_equals(self):
def test_equals_range(self):
equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)),
(RangeIndex(0), RangeIndex(1, -1, 3)),
(RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)),
Expand Down
30 changes: 29 additions & 1 deletion pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np
from pandas.types.common import (is_integer, is_float,
is_bool_dtype, _ensure_int64,
is_scalar,
is_scalar, is_dtype_equal,
is_list_like)
from pandas.types.generic import (ABCIndex, ABCSeries,
ABCPeriodIndex, ABCIndexClass)
Expand Down Expand Up @@ -108,6 +108,34 @@ def ceil(self, freq):
class DatetimeIndexOpsMixin(object):
""" common ops mixin to support a unified inteface datetimelike Index """

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

if not isinstance(other, ABCIndexClass):
return False
elif not isinstance(other, type(self)):
try:
other = type(self)(other)
except:
return False

if not is_dtype_equal(self.dtype, other.dtype):
# have different timezone
return False

# ToDo: Remove this when PeriodDtype is added
elif isinstance(self, ABCPeriodIndex):
if not isinstance(other, ABCPeriodIndex):
return False
if self.freq != other.freq:
return False

return np.array_equal(self.asi8, other.asi8)

def __iter__(self):
return (self._box_func(v) for v in self.asi8)

Expand Down
20 changes: 0 additions & 20 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1655,26 +1655,6 @@ def is_normalized(self):
def _resolution(self):
return period.resolution(self.asi8, self.tz)

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

if (not hasattr(other, 'inferred_type') or
other.inferred_type != 'datetime64'):
if self.offset is not None:
return False
try:
other = DatetimeIndex(other)
except:
return False

if self._has_same_tz(other):
return np.array_equal(self.asi8, other.asi8)
return False

def insert(self, loc, item):
"""
Make new Index inserting new item at location
Expand Down
15 changes: 0 additions & 15 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,21 +596,6 @@ def _mpl_repr(self):
# how to represent ourselves to matplotlib
return self.asobject.values

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

if not isinstance(other, PeriodIndex):
try:
other = PeriodIndex(other)
except:
return False

return np.array_equal(self.asi8, other.asi8)

def to_timestamp(self, freq=None, how='start'):
"""
Cast to DatetimeIndex
Expand Down
16 changes: 0 additions & 16 deletions pandas/tseries/tdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,22 +834,6 @@ def dtype(self):
def is_all_dates(self):
return True

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

if (not hasattr(other, 'inferred_type') or
other.inferred_type != 'timedelta64'):
try:
other = TimedeltaIndex(other)
except:
return False

return np.array_equal(self.asi8, other.asi8)

def insert(self, loc, item):
"""
Make new Index inserting new item at location
Expand Down
Loading