diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 777bc01e71833..13155eb21320e 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1454,6 +1454,7 @@ Bug Fixes - Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`) - Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`) - Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) +- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`) - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 15cd2064624d9..e2e95edecc8f8 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1618,8 +1618,15 @@ def equals(self, other): if not isinstance(other, Index): return False - return array_equivalent(_values_from_object(self), - _values_from_object(other)) + if is_object_dtype(self) and not is_object_dtype(other): + # if other is not object, use other's logic for coercion + return other.equals(self) + + try: + return array_equivalent(_values_from_object(self), + _values_from_object(other)) + except: + return False def identical(self, other): """Similar to equals, but check that other comparable attributes are diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 251886ebdd974..71c5d7ed1a94b 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -196,6 +196,9 @@ def equals(self, other): if self.is_(other): return True + if not isinstance(other, Index): + return False + try: other = self._is_dtype_compat(other) return array_equivalent(self._data, other) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 618bc319c3f74..f42410fcdf098 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1436,6 +1436,7 @@ def reindex(self, target, method=None, level=None, limit=None, return_indexers=True, keep_order=False) else: + target = _ensure_index(target) if self.equals(target): indexer = None else: @@ -1984,6 +1985,9 @@ def equals(self, other): if self.is_(other): return True + if not isinstance(other, Index): + return False + if not isinstance(other, MultiIndex): return array_equivalent(self._values, _values_from_object(_ensure_index(other))) diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index e1ac0939812f6..b9625f3aaff92 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -7,7 +7,7 @@ from pandas.types.common import (is_dtype_equal, pandas_dtype, is_float_dtype, is_object_dtype, is_integer_dtype, is_scalar) -from pandas.types.missing import array_equivalent, isnull +from pandas.types.missing import isnull from pandas.core.common import _values_from_object from pandas import compat @@ -160,16 +160,6 @@ def _convert_scalar_indexer(self, key, kind=None): return (super(Int64Index, self) ._convert_scalar_indexer(key, kind=kind)) - def equals(self, other): - """ - Determines if two Index objects contain the same elements. - """ - if self.is_(other): - return True - - return array_equivalent(_values_from_object(self), - _values_from_object(other)) - def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None return Int64Index(joined, name=name) @@ -306,6 +296,9 @@ def equals(self, other): if self is other: return True + if not isinstance(other, Index): + return False + # need to compare nans locations and make sure that they are the same # since nans don't compare equal this is a bit tricky try: diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 2c8031898c78e..773f20532e4ff 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -650,6 +650,20 @@ def test_delete_base(self): # either depending on numpy version result = idx.delete(len(idx)) + def test_equals(self): + + for name, idx in compat.iteritems(self.indices): + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.astype(object))) + + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(np.array(idx))) + + if idx.nlevels == 1: + # do not test MultiIndex + self.assertFalse(idx.equals(pd.Series(idx))) + def test_equals_op(self): # GH9947, GH10637 index_a = self.create_index() diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 66a5a155dd7a5..0ef7e6bf3be97 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -400,7 +400,7 @@ def test_astype(self): casted = self.intIndex.astype('i8') self.assertEqual(casted.name, 'foobar') - def test_equals(self): + def test_equals_object(self): # same self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c']))) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index b0e50491b8e9d..5d2544e5f6478 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -522,7 +522,7 @@ def test_ensure_copied_data(self): result = CategoricalIndex(index.values, copy=False) self.assertIs(_base(index.values), _base(result.values)) - def test_equals(self): + def test_equals_categorical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], @@ -556,19 +556,30 @@ def test_equals(self): # tests # make sure that we are testing for category inclusion properly - self.assertTrue(CategoricalIndex( - list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca'))) + ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) + self.assertFalse(ci.equals(list('aabca'))) + self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) + self.assertTrue(ci.equals(ci.copy())) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + ci = CategoricalIndex(list('aabca'), + categories=['c', 'a', 'b', np.nan]) + self.assertFalse(ci.equals(list('aabca'))) + self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(CategoricalIndex( - list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list( - 'aabca'))) - - self.assertFalse(CategoricalIndex( - list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( - 'aabca'))) - self.assertTrue(CategoricalIndex( - list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( - 'aabca') + [np.nan])) + self.assertTrue(ci.equals(ci.copy())) + + ci = CategoricalIndex(list('aabca') + [np.nan], + categories=['c', 'a', 'b']) + self.assertFalse(ci.equals(list('aabca'))) + self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) + self.assertTrue(ci.equals(ci.copy())) + + ci = CategoricalIndex(list('aabca') + [np.nan], + categories=['c', 'a', 'b']) + self.assertFalse(ci.equals(list('aabca') + [np.nan])) + self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan]))) + self.assertTrue(ci.equals(ci.copy())) def test_string_categorical_index_repr(self): # short diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 675193e1538b2..602cfabbc26b7 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1232,7 +1232,7 @@ def test_to_hierarchical(self): def test_bounds(self): self.index._bounds - def test_equals(self): + def test_equals_multi(self): self.assertTrue(self.index.equals(self.index)) self.assertTrue(self.index.equal_levels(self.index)) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index f0af43e3513bb..d3a89b301ae46 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -265,7 +265,7 @@ def test_astype(self): i = Float64Index([0, 1.1, np.NAN]) self.assertRaises(ValueError, lambda: i.astype(dtype)) - def test_equals(self): + def test_equals_numeric(self): i = Float64Index([1.0, 2.0]) self.assertTrue(i.equals(i)) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 168ef7fc8d100..b0b8864521666 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -337,7 +337,7 @@ def test_is_monotonic(self): self.assertTrue(index.is_monotonic_increasing) self.assertTrue(index.is_monotonic_decreasing) - def test_equals(self): + def test_equals_range(self): equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), (RangeIndex(0), RangeIndex(1, -1, 3)), (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index f0c6e334925c4..e9a5d6508a1e8 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -11,7 +11,7 @@ import numpy as np from pandas.types.common import (is_integer, is_float, is_bool_dtype, _ensure_int64, - is_scalar, + is_scalar, is_dtype_equal, is_list_like) from pandas.types.generic import (ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) @@ -108,6 +108,34 @@ def ceil(self, freq): class DatetimeIndexOpsMixin(object): """ common ops mixin to support a unified inteface datetimelike Index """ + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + if not isinstance(other, ABCIndexClass): + return False + elif not isinstance(other, type(self)): + try: + other = type(self)(other) + except: + return False + + if not is_dtype_equal(self.dtype, other.dtype): + # have different timezone + return False + + # ToDo: Remove this when PeriodDtype is added + elif isinstance(self, ABCPeriodIndex): + if not isinstance(other, ABCPeriodIndex): + return False + if self.freq != other.freq: + return False + + return np.array_equal(self.asi8, other.asi8) + def __iter__(self): return (self._box_func(v) for v in self.asi8) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index f78574521ffeb..95ea7df510cb5 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1655,26 +1655,6 @@ def is_normalized(self): def _resolution(self): return period.resolution(self.asi8, self.tz) - def equals(self, other): - """ - Determines if two Index objects contain the same elements. - """ - if self.is_(other): - return True - - if (not hasattr(other, 'inferred_type') or - other.inferred_type != 'datetime64'): - if self.offset is not None: - return False - try: - other = DatetimeIndex(other) - except: - return False - - if self._has_same_tz(other): - return np.array_equal(self.asi8, other.asi8) - return False - def insert(self, loc, item): """ Make new Index inserting new item at location diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 7fb0f19b04486..5b2cb0754e319 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -596,21 +596,6 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return self.asobject.values - def equals(self, other): - """ - Determines if two Index objects contain the same elements. - """ - if self.is_(other): - return True - - if not isinstance(other, PeriodIndex): - try: - other = PeriodIndex(other) - except: - return False - - return np.array_equal(self.asi8, other.asi8) - def to_timestamp(self, freq=None, how='start'): """ Cast to DatetimeIndex diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index a17eda3ac4288..d8d9c2feb8b89 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -834,22 +834,6 @@ def dtype(self): def is_all_dates(self): return True - def equals(self, other): - """ - Determines if two Index objects contain the same elements. - """ - if self.is_(other): - return True - - if (not hasattr(other, 'inferred_type') or - other.inferred_type != 'timedelta64'): - try: - other = TimedeltaIndex(other) - except: - return False - - return np.array_equal(self.asi8, other.asi8) - def insert(self, loc, item): """ Make new Index inserting new item at location diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index aa13591a4ff30..96ff74c819624 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -819,6 +819,37 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.int64)) + def test_equals(self): + # GH 13107 + for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.asobject)) + self.assertTrue(idx.asobject.equals(idx)) + self.assertTrue(idx.asobject.equals(idx.asobject)) + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(pd.Series(idx))) + + idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], + tz='US/Pacific') + self.assertFalse(idx.equals(idx2)) + self.assertFalse(idx.equals(idx2.copy())) + self.assertFalse(idx.equals(idx2.asobject)) + self.assertFalse(idx.asobject.equals(idx2)) + self.assertFalse(idx.equals(list(idx2))) + self.assertFalse(idx.equals(pd.Series(idx2))) + + # same internal, different tz + idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + self.assertFalse(idx.equals(idx3)) + self.assertFalse(idx.equals(idx3.copy())) + self.assertFalse(idx.equals(idx3.asobject)) + self.assertFalse(idx.asobject.equals(idx3)) + self.assertFalse(idx.equals(list(idx3))) + self.assertFalse(idx.equals(pd.Series(idx3))) + class TestTimedeltaIndexOps(Ops): def setUp(self): @@ -1682,6 +1713,26 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.int64)) + def test_equals(self): + # GH 13107 + idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.asobject)) + self.assertTrue(idx.asobject.equals(idx)) + self.assertTrue(idx.asobject.equals(idx.asobject)) + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(pd.Series(idx))) + + idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) + self.assertFalse(idx.equals(idx2)) + self.assertFalse(idx.equals(idx2.copy())) + self.assertFalse(idx.equals(idx2.asobject)) + self.assertFalse(idx.asobject.equals(idx2)) + self.assertFalse(idx.asobject.equals(idx2.asobject)) + self.assertFalse(idx.equals(list(idx2))) + self.assertFalse(idx.equals(pd.Series(idx2))) + class TestPeriodIndexOps(Ops): def setUp(self): @@ -2646,6 +2697,38 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.int64)) + def test_equals(self): + # GH 13107 + for freq in ['D', 'M']: + idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], + freq=freq) + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.asobject)) + self.assertTrue(idx.asobject.equals(idx)) + self.assertTrue(idx.asobject.equals(idx.asobject)) + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(pd.Series(idx))) + + idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], + freq='H') + self.assertFalse(idx.equals(idx2)) + self.assertFalse(idx.equals(idx2.copy())) + self.assertFalse(idx.equals(idx2.asobject)) + self.assertFalse(idx.asobject.equals(idx2)) + self.assertFalse(idx.equals(list(idx2))) + self.assertFalse(idx.equals(pd.Series(idx2))) + + # same internal, different tz + idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H') + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + self.assertFalse(idx.equals(idx3)) + self.assertFalse(idx.equals(idx3.copy())) + self.assertFalse(idx.equals(idx3.asobject)) + self.assertFalse(idx.asobject.equals(idx3)) + self.assertFalse(idx.equals(list(idx3))) + self.assertFalse(idx.equals(pd.Series(idx3))) + if __name__ == '__main__': import nose diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 77e0216c5c79a..ab413af897215 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1523,7 +1523,7 @@ def test_misc_coverage(self): tm.assertIsInstance(list(result.values())[0][0], Timedelta) idx = TimedeltaIndex(['3d', '1d', '2d']) - self.assertTrue(idx.equals(list(idx))) + self.assertFalse(idx.equals(list(idx))) non_td = Index(list('abc')) self.assertFalse(idx.equals(list(non_td))) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 2355d663ed7d5..5ce0bdffe7ad4 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3032,7 +3032,7 @@ def test_misc_coverage(self): tm.assertIsInstance(list(result.values())[0][0], Timestamp) idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) - self.assertTrue(idx.equals(list(idx))) + self.assertFalse(idx.equals(list(idx))) non_datetime = Index(list('abc')) self.assertFalse(idx.equals(list(non_datetime)))