Skip to content

Commit ebfb4c8

Browse files
committed
Merge pull request #4909 from jtratner/add-is_-method-to-index
ENH: Add 'is_' method to Index for identity checks
2 parents 0eab187 + 0ea6de5 commit ebfb4c8

File tree

7 files changed

+121
-10
lines changed

7 files changed

+121
-10
lines changed

doc/source/release.rst

+4
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,10 @@ API Changes
198198
data - allowing metadata changes.
199199
- ``MultiIndex.astype()`` now only allows ``np.object_``-like dtypes and
200200
now returns a ``MultiIndex`` rather than an ``Index``. (:issue:`4039`)
201+
- Added ``is_`` method to ``Index`` that allows fast equality comparison of
202+
views (similar to ``np.may_share_memory`` but no false positives, and
203+
changes on ``levels`` and ``labels`` setting on ``MultiIndex``).
204+
(:issue:`4859`, :issue:`4909`)
201205

202206
- Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`)
203207
- ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`)

pandas/core/index.py

+41-3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ def _shouldbe_timestamp(obj):
5050
or tslib.is_timestamp_array(obj))
5151

5252

53+
_Identity = object
54+
55+
5356
class Index(FrozenNDArray):
5457
"""
5558
Immutable ndarray implementing an ordered, sliceable set. The basic object
@@ -87,6 +90,35 @@ class Index(FrozenNDArray):
8790

8891
_engine_type = _index.ObjectEngine
8992

93+
def is_(self, other):
94+
"""
95+
More flexible, faster check like ``is`` but that works through views
96+
97+
Note: this is *not* the same as ``Index.identical()``, which checks
98+
that metadata is also the same.
99+
100+
Parameters
101+
----------
102+
other : object
103+
other object to compare against.
104+
105+
Returns
106+
-------
107+
True if both have same underlying data, False otherwise : bool
108+
"""
109+
# use something other than None to be clearer
110+
return self._id is getattr(other, '_id', Ellipsis)
111+
112+
def _reset_identity(self):
113+
"Initializes or resets ``_id`` attribute with new object"
114+
self._id = _Identity()
115+
116+
def view(self, *args, **kwargs):
117+
result = super(Index, self).view(*args, **kwargs)
118+
if isinstance(result, Index):
119+
result._id = self._id
120+
return result
121+
90122
def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
91123
**kwargs):
92124

@@ -151,6 +183,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
151183
return subarr
152184

153185
def __array_finalize__(self, obj):
186+
self._reset_identity()
154187
if not isinstance(obj, type(self)):
155188
# Only relevant if array being created from an Index instance
156189
return
@@ -279,6 +312,7 @@ def set_names(self, names, inplace=False):
279312
raise TypeError("Must pass list-like as `names`.")
280313
if inplace:
281314
idx = self
315+
idx._reset_identity()
282316
else:
283317
idx = self._shallow_copy()
284318
idx._set_names(names)
@@ -554,7 +588,7 @@ def equals(self, other):
554588
"""
555589
Determines if two Index objects contain the same elements.
556590
"""
557-
if self is other:
591+
if self.is_(other):
558592
return True
559593

560594
if not isinstance(other, Index):
@@ -1536,7 +1570,7 @@ def equals(self, other):
15361570
"""
15371571
Determines if two Index objects contain the same elements.
15381572
"""
1539-
if self is other:
1573+
if self.is_(other):
15401574
return True
15411575

15421576
# if not isinstance(other, Int64Index):
@@ -1645,6 +1679,7 @@ def set_levels(self, levels, inplace=False):
16451679
idx = self
16461680
else:
16471681
idx = self._shallow_copy()
1682+
idx._reset_identity()
16481683
idx._set_levels(levels)
16491684
return idx
16501685

@@ -1683,6 +1718,7 @@ def set_labels(self, labels, inplace=False):
16831718
idx = self
16841719
else:
16851720
idx = self._shallow_copy()
1721+
idx._reset_identity()
16861722
idx._set_labels(labels)
16871723
return idx
16881724

@@ -1736,6 +1772,8 @@ def __array_finalize__(self, obj):
17361772
Update custom MultiIndex attributes when a new array is created by
17371773
numpy, e.g. when calling ndarray.view()
17381774
"""
1775+
# overriden if a view
1776+
self._reset_identity()
17391777
if not isinstance(obj, type(self)):
17401778
# Only relevant if this array is being created from an Index
17411779
# instance.
@@ -2754,7 +2792,7 @@ def equals(self, other):
27542792
--------
27552793
equal_levels
27562794
"""
2757-
if self is other:
2795+
if self.is_(other):
27582796
return True
27592797

27602798
if not isinstance(other, MultiIndex):

pandas/tests/test_index.py

+45
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,28 @@ def test_identical(self):
192192
i2 = i2.rename('foo')
193193
self.assert_(i1.identical(i2))
194194

195+
def test_is_(self):
196+
ind = Index(range(10))
197+
self.assertTrue(ind.is_(ind))
198+
self.assertTrue(ind.is_(ind.view().view().view().view()))
199+
self.assertFalse(ind.is_(Index(range(10))))
200+
self.assertFalse(ind.is_(ind.copy()))
201+
self.assertFalse(ind.is_(ind.copy(deep=False)))
202+
self.assertFalse(ind.is_(ind[:]))
203+
self.assertFalse(ind.is_(ind.view(np.ndarray).view(Index)))
204+
self.assertFalse(ind.is_(np.array(range(10))))
205+
self.assertTrue(ind.is_(ind.view().base)) # quasi-implementation dependent
206+
ind2 = ind.view()
207+
ind2.name = 'bob'
208+
self.assertTrue(ind.is_(ind2))
209+
self.assertTrue(ind2.is_(ind))
210+
# doesn't matter if Indices are *actually* views of underlying data,
211+
self.assertFalse(ind.is_(Index(ind.values)))
212+
arr = np.array(range(1, 11))
213+
ind1 = Index(arr, copy=False)
214+
ind2 = Index(arr, copy=False)
215+
self.assertFalse(ind1.is_(ind2))
216+
195217
def test_asof(self):
196218
d = self.dateIndex[0]
197219
self.assert_(self.dateIndex.asof(d) is d)
@@ -1719,6 +1741,29 @@ def test_identical(self):
17191741
mi2 = mi2.set_names(['new1','new2'])
17201742
self.assert_(mi.identical(mi2))
17211743

1744+
def test_is_(self):
1745+
mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
1746+
self.assertTrue(mi.is_(mi))
1747+
self.assertTrue(mi.is_(mi.view()))
1748+
self.assertTrue(mi.is_(mi.view().view().view().view()))
1749+
mi2 = mi.view()
1750+
# names are metadata, they don't change id
1751+
mi2.names = ["A", "B"]
1752+
self.assertTrue(mi2.is_(mi))
1753+
self.assertTrue(mi.is_(mi2))
1754+
self.assertTrue(mi.is_(mi.set_names(["C", "D"])))
1755+
# levels are inherent properties, they change identity
1756+
mi3 = mi2.set_levels([lrange(10), lrange(10)])
1757+
self.assertFalse(mi3.is_(mi2))
1758+
# shouldn't change
1759+
self.assertTrue(mi2.is_(mi))
1760+
mi4 = mi3.view()
1761+
mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True)
1762+
self.assertFalse(mi4.is_(mi3))
1763+
mi5 = mi.view()
1764+
mi5.set_levels(mi5.levels, inplace=True)
1765+
self.assertFalse(mi5.is_(mi))
1766+
17221767
def test_union(self):
17231768
piece1 = self.index[:5][::-1]
17241769
piece2 = self.index[3:]

pandas/tseries/index.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE,
1010
is_list_like,_values_from_object, _maybe_box)
11-
from pandas.core.index import Index, Int64Index
11+
from pandas.core.index import Index, Int64Index, _Identity
1212
import pandas.compat as compat
1313
from pandas.compat import u
1414
from pandas.tseries.frequencies import (
@@ -1029,6 +1029,7 @@ def __array_finalize__(self, obj):
10291029
self.offset = getattr(obj, 'offset', None)
10301030
self.tz = getattr(obj, 'tz', None)
10311031
self.name = getattr(obj, 'name', None)
1032+
self._reset_identity()
10321033

10331034
def intersection(self, other):
10341035
"""
@@ -1446,7 +1447,7 @@ def equals(self, other):
14461447
"""
14471448
Determines if two Index objects contain the same elements.
14481449
"""
1449-
if self is other:
1450+
if self.is_(other):
14501451
return True
14511452

14521453
if (not hasattr(other, 'inferred_type') or

pandas/tseries/period.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,7 @@ def equals(self, other):
812812
"""
813813
Determines if two Index objects contain the same elements.
814814
"""
815-
if self is other:
815+
if self.is_(other):
816816
return True
817817

818818
return np.array_equal(self.asi8, other.asi8)
@@ -1076,6 +1076,7 @@ def __array_finalize__(self, obj):
10761076

10771077
self.freq = getattr(obj, 'freq', None)
10781078
self.name = getattr(obj, 'name', None)
1079+
self._reset_identity()
10791080

10801081
def __repr__(self):
10811082
output = com.pprint_thing(self.__class__) + '\n'

pandas/tseries/tests/test_period.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -1054,9 +1054,6 @@ def test_conv_secondly(self):
10541054

10551055

10561056
class TestPeriodIndex(TestCase):
1057-
def __init__(self, *args, **kwds):
1058-
TestCase.__init__(self, *args, **kwds)
1059-
10601057
def setUp(self):
10611058
pass
10621059

@@ -1168,6 +1165,25 @@ def test_constructor_datetime64arr(self):
11681165

11691166
self.assertRaises(ValueError, PeriodIndex, vals, freq='D')
11701167

1168+
def test_is_(self):
1169+
create_index = lambda: PeriodIndex(freq='A', start='1/1/2001',
1170+
end='12/1/2009')
1171+
index = create_index()
1172+
self.assertTrue(index.is_(index))
1173+
self.assertFalse(index.is_(create_index()))
1174+
self.assertTrue(index.is_(index.view()))
1175+
self.assertTrue(index.is_(index.view().view().view().view().view()))
1176+
self.assertTrue(index.view().is_(index))
1177+
ind2 = index.view()
1178+
index.name = "Apple"
1179+
self.assertTrue(ind2.is_(index))
1180+
self.assertFalse(index.is_(index[:]))
1181+
self.assertFalse(index.is_(index.asfreq('M')))
1182+
self.assertFalse(index.is_(index.asfreq('A')))
1183+
self.assertFalse(index.is_(index - 2))
1184+
self.assertFalse(index.is_(index - 0))
1185+
1186+
11711187
def test_comp_period(self):
11721188
idx = period_range('2007-01', periods=20, freq='M')
11731189

pandas/tseries/tests/test_timeseries.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,12 @@ def assert_range_equal(left, right):
274274
class TestTimeSeries(unittest.TestCase):
275275
_multiprocess_can_split_ = True
276276

277+
def test_is_(self):
278+
dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
279+
self.assertTrue(dti.is_(dti))
280+
self.assertTrue(dti.is_(dti.view()))
281+
self.assertFalse(dti.is_(dti.copy()))
282+
277283
def test_dti_slicing(self):
278284
dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
279285
dti2 = dti[[1, 3, 5]]
@@ -655,7 +661,7 @@ def test_index_astype_datetime64(self):
655661
idx = Index([datetime(2012, 1, 1)], dtype=object)
656662

657663
if np.__version__ >= '1.7':
658-
raise nose.SkipTest
664+
raise nose.SkipTest("Test requires numpy < 1.7")
659665

660666
casted = idx.astype(np.dtype('M8[D]'))
661667
expected = DatetimeIndex(idx.values)

0 commit comments

Comments
 (0)