pandas-dev · jtratner · Sep 22, 2013 · Sep 18, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -198,6 +198,10 @@ API Changes
       data - allowing metadata changes.
     - ``MultiIndex.astype()`` now only allows ``np.object_``-like dtypes and
       now returns a ``MultiIndex`` rather than an ``Index``. (:issue:`4039`)
+   - Added ``is_`` method to ``Index`` that allows fast equality comparison of
+     views (similar to ``np.may_share_memory`` but no false positives, and
+     changes on ``levels`` and ``labels`` setting on ``MultiIndex``).
+     (:issue:`4859`, :issue:`4909`)
 
   - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`)
   - ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`)

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -50,6 +50,9 @@ def _shouldbe_timestamp(obj):
             or tslib.is_timestamp_array(obj))
 
 
+_Identity = object
+
+
 class Index(FrozenNDArray):
     """
     Immutable ndarray implementing an ordered, sliceable set. The basic object
@@ -87,6 +90,35 @@ class Index(FrozenNDArray):
 
     _engine_type = _index.ObjectEngine
 
+    def is_(self, other):
+        """
+        More flexible, faster check like ``is`` but that works through views
+
+        Note: this is *not* the same as ``Index.identical()``, which checks
+        that metadata is also the same.
+
+        Parameters
+        ----------
+        other : object
+            other object to compare against.
+
+        Returns
+        -------
+        True if both have same underlying data, False otherwise : bool
+        """
+        # use something other than None to be clearer
+        return self._id is getattr(other, '_id', Ellipsis)
+
+    def _reset_identity(self):
+        "Initializes or resets ``_id`` attribute with new object"
+        self._id = _Identity()
+
+    def view(self, *args, **kwargs):
+        result = super(Index, self).view(*args, **kwargs)
+        if isinstance(result, Index):
+            result._id = self._id
+        return result
+
     def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
                 **kwargs):
 
@@ -151,6 +183,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
         return subarr
 
     def __array_finalize__(self, obj):
+        self._reset_identity()
         if not isinstance(obj, type(self)):
             # Only relevant if array being created from an Index instance
             return
@@ -279,6 +312,7 @@ def set_names(self, names, inplace=False):
             raise TypeError("Must pass list-like as `names`.")
         if inplace:
             idx = self
+            idx._reset_identity()
         else:
             idx = self._shallow_copy()
         idx._set_names(names)
@@ -554,7 +588,7 @@ def equals(self, other):
         """
         Determines if two Index objects contain the same elements.
         """
-        if self is other:
+        if self.is_(other):
             return True
 
         if not isinstance(other, Index):
@@ -1536,7 +1570,7 @@ def equals(self, other):
         """
         Determines if two Index objects contain the same elements.
         """
-        if self is other:
+        if self.is_(other):
             return True
 
         # if not isinstance(other, Int64Index):
@@ -1645,6 +1679,7 @@ def set_levels(self, levels, inplace=False):
             idx = self
         else:
             idx = self._shallow_copy()
+        idx._reset_identity()
         idx._set_levels(levels)
         return idx
 
@@ -1683,6 +1718,7 @@ def set_labels(self, labels, inplace=False):
             idx = self
         else:
             idx = self._shallow_copy()
+        idx._reset_identity()
         idx._set_labels(labels)
         return idx
 
@@ -1736,6 +1772,8 @@ def __array_finalize__(self, obj):
         Update custom MultiIndex attributes when a new array is created by
         numpy, e.g. when calling ndarray.view()
         """
+        # overriden if a view
+        self._reset_identity()
         if not isinstance(obj, type(self)):
             # Only relevant if this array is being created from an Index
             # instance.
@@ -2754,7 +2792,7 @@ def equals(self, other):
         --------
         equal_levels
         """
-        if self is other:
+        if self.is_(other):
             return True
 
         if not isinstance(other, MultiIndex):

diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -192,6 +192,28 @@ def test_identical(self):
         i2 = i2.rename('foo')
         self.assert_(i1.identical(i2))
 
+    def test_is_(self):
+        ind = Index(range(10))
+        self.assertTrue(ind.is_(ind))
+        self.assertTrue(ind.is_(ind.view().view().view().view()))
+        self.assertFalse(ind.is_(Index(range(10))))
+        self.assertFalse(ind.is_(ind.copy()))
+        self.assertFalse(ind.is_(ind.copy(deep=False)))
+        self.assertFalse(ind.is_(ind[:]))
+        self.assertFalse(ind.is_(ind.view(np.ndarray).view(Index)))
+        self.assertFalse(ind.is_(np.array(range(10))))
+        self.assertTrue(ind.is_(ind.view().base)) # quasi-implementation dependent
+        ind2 = ind.view()
+        ind2.name = 'bob'
+        self.assertTrue(ind.is_(ind2))
+        self.assertTrue(ind2.is_(ind))
+        # doesn't matter if Indices are *actually* views of underlying data,
+        self.assertFalse(ind.is_(Index(ind.values)))
+        arr = np.array(range(1, 11))
+        ind1 = Index(arr, copy=False)
+        ind2 = Index(arr, copy=False)
+        self.assertFalse(ind1.is_(ind2))
+
     def test_asof(self):
         d = self.dateIndex[0]
         self.assert_(self.dateIndex.asof(d) is d)
@@ -1719,6 +1741,29 @@ def test_identical(self):
         mi2 = mi2.set_names(['new1','new2'])
         self.assert_(mi.identical(mi2))
 
+    def test_is_(self):
+        mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
+        self.assertTrue(mi.is_(mi))
+        self.assertTrue(mi.is_(mi.view()))
+        self.assertTrue(mi.is_(mi.view().view().view().view()))
+        mi2 = mi.view()
+        # names are metadata, they don't change id
+        mi2.names = ["A", "B"]
+        self.assertTrue(mi2.is_(mi))
+        self.assertTrue(mi.is_(mi2))
+        self.assertTrue(mi.is_(mi.set_names(["C", "D"])))
+        # levels are inherent properties, they change identity
+        mi3 = mi2.set_levels([lrange(10), lrange(10)])
+        self.assertFalse(mi3.is_(mi2))
+        # shouldn't change
+        self.assertTrue(mi2.is_(mi))
+        mi4 = mi3.view()
+        mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True)
+        self.assertFalse(mi4.is_(mi3))
+        mi5 = mi.view()
+        mi5.set_levels(mi5.levels, inplace=True)
+        self.assertFalse(mi5.is_(mi))
+
     def test_union(self):
         piece1 = self.index[:5][::-1]
         piece2 = self.index[3:]

diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
@@ -8,7 +8,7 @@
 
 from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE,
                                 is_list_like,_values_from_object, _maybe_box)
-from pandas.core.index import Index, Int64Index
+from pandas.core.index import Index, Int64Index, _Identity
 import pandas.compat as compat
 from pandas.compat import u
 from pandas.tseries.frequencies import (
@@ -1029,6 +1029,7 @@ def __array_finalize__(self, obj):
         self.offset = getattr(obj, 'offset', None)
         self.tz = getattr(obj, 'tz', None)
         self.name = getattr(obj, 'name', None)
+        self._reset_identity()
 
     def intersection(self, other):
         """
@@ -1446,7 +1447,7 @@ def equals(self, other):
         """
         Determines if two Index objects contain the same elements.
         """
-        if self is other:
+        if self.is_(other):
             return True
 
         if (not hasattr(other, 'inferred_type') or

diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
@@ -812,7 +812,7 @@ def equals(self, other):
         """
         Determines if two Index objects contain the same elements.
         """
-        if self is other:
+        if self.is_(other):
             return True
 
         return np.array_equal(self.asi8, other.asi8)
@@ -1076,6 +1076,7 @@ def __array_finalize__(self, obj):
 
         self.freq = getattr(obj, 'freq', None)
         self.name = getattr(obj, 'name', None)
+        self._reset_identity()
 
     def __repr__(self):
         output = com.pprint_thing(self.__class__) + '\n'

diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
@@ -1054,9 +1054,6 @@ def test_conv_secondly(self):
 
 
 class TestPeriodIndex(TestCase):
-    def __init__(self, *args, **kwds):
-        TestCase.__init__(self, *args, **kwds)
-
     def setUp(self):
         pass
 
@@ -1168,6 +1165,25 @@ def test_constructor_datetime64arr(self):
 
         self.assertRaises(ValueError, PeriodIndex, vals, freq='D')
 
+    def test_is_(self):
+        create_index = lambda: PeriodIndex(freq='A', start='1/1/2001',
+                                           end='12/1/2009')
+        index = create_index()
+        self.assertTrue(index.is_(index))
+        self.assertFalse(index.is_(create_index()))
+        self.assertTrue(index.is_(index.view()))
+        self.assertTrue(index.is_(index.view().view().view().view().view()))
+        self.assertTrue(index.view().is_(index))
+        ind2 = index.view()
+        index.name = "Apple"
+        self.assertTrue(ind2.is_(index))
+        self.assertFalse(index.is_(index[:]))
+        self.assertFalse(index.is_(index.asfreq('M')))
+        self.assertFalse(index.is_(index.asfreq('A')))
+        self.assertFalse(index.is_(index - 2))
+        self.assertFalse(index.is_(index - 0))
+
+
     def test_comp_period(self):
         idx = period_range('2007-01', periods=20, freq='M')
 

diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
@@ -274,6 +274,12 @@ def assert_range_equal(left, right):
 class TestTimeSeries(unittest.TestCase):
     _multiprocess_can_split_ = True
 
+    def test_is_(self):
+        dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
+        self.assertTrue(dti.is_(dti))
+        self.assertTrue(dti.is_(dti.view()))
+        self.assertFalse(dti.is_(dti.copy()))
+
     def test_dti_slicing(self):
         dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
         dti2 = dti[[1, 3, 5]]
@@ -655,7 +661,7 @@ def test_index_astype_datetime64(self):
         idx = Index([datetime(2012, 1, 1)], dtype=object)
 
         if np.__version__ >= '1.7':
-            raise nose.SkipTest
+            raise nose.SkipTest("Test requires numpy < 1.7")
 
         casted = idx.astype(np.dtype('M8[D]'))
         expected = DatetimeIndex(idx.values)