Skip to content

Commit c740bb0

Browse files
Merge pull request #9947 from mortada/index_compare
operator equal on Index should behavior similarly to Series
2 parents 5c906ff + d5ff457 commit c740bb0

File tree

4 files changed

+182
-56
lines changed

4 files changed

+182
-56
lines changed

doc/source/basics.rst

+45-3
Original file line numberDiff line numberDiff line change
@@ -240,14 +240,14 @@ way to summarize a boolean result.
240240

241241
.. ipython:: python
242242
243-
(df>0).all()
244-
(df>0).any()
243+
(df > 0).all()
244+
(df > 0).any()
245245
246246
You can reduce to a final boolean value.
247247

248248
.. ipython:: python
249249
250-
(df>0).any().any()
250+
(df > 0).any().any()
251251
252252
You can test if a pandas object is empty, via the :attr:`~DataFrame.empty` property.
253253

@@ -330,6 +330,48 @@ equality to be True:
330330
df1.equals(df2)
331331
df1.equals(df2.sort())
332332
333+
Comparing array-like objects
334+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
335+
336+
You can conveniently do element-wise comparisons when comparing a pandas
337+
data structure with a scalar value:
338+
339+
.. ipython:: python
340+
341+
pd.Series(['foo', 'bar', 'baz']) == 'foo'
342+
pd.Index(['foo', 'bar', 'baz']) == 'foo'
343+
344+
Pandas also handles element-wise comparisons between different array-like
345+
objects of the same length:
346+
347+
.. ipython:: python
348+
349+
pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])
350+
pd.Series(['foo', 'bar', 'baz']) == np.array(['foo', 'bar', 'qux'])
351+
352+
Trying to compare ``Index`` or ``Series`` objects of different lengths will
353+
raise a ValueError:
354+
355+
.. code-block:: python
356+
357+
In [55]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo', 'bar'])
358+
ValueError: Series lengths must match to compare
359+
360+
In [56]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo'])
361+
ValueError: Series lengths must match to compare
362+
363+
Note that this is different from the numpy behavior where a comparison can
364+
be broadcast:
365+
366+
.. ipython:: python
367+
368+
np.array([1, 2, 3]) == np.array([2])
369+
370+
or it can return False if broadcasting can not be done:
371+
372+
.. ipython:: python
373+
374+
np.array([1, 2, 3]) == np.array([1, 2])
333375
334376
Combining overlapping data sets
335377
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/whatsnew/v0.17.0.txt

+71
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,76 @@ Old New
152152
to do nothing, and so it is necessary to pass at least one conversion target
153153
in the method call.
154154

155+
- Operator equal on Index should behavior similarly to Series (:issue:`9947`)
156+
157+
Starting in v0.17.0, comparing ``Index`` objects of different lengths will raise
158+
a ``ValueError``. This is to be consistent with the behavior of ``Series``.
159+
160+
Previous behavior:
161+
162+
.. code-block:: python
163+
164+
In [2]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5])
165+
Out[2]: array([ True, False, False], dtype=bool)
166+
167+
In [3]: pd.Index([1, 2, 3]) == pd.Index([2])
168+
Out[3]: array([False, True, False], dtype=bool)
169+
170+
In [4]: pd.Index([1, 2, 3]) == pd.Index([1, 2])
171+
Out[4]: False
172+
173+
In [5]: pd.Series([1, 2, 3]) == pd.Series([1, 4, 5])
174+
Out[5]:
175+
0 True
176+
1 False
177+
2 False
178+
dtype: bool
179+
180+
In [6]: pd.Series([1, 2, 3]) == pd.Series([2])
181+
ValueError: Series lengths must match to compare
182+
183+
In [7]: pd.Series([1, 2, 3]) == pd.Series([1, 2])
184+
ValueError: Series lengths must match to compare
185+
186+
New behavior:
187+
188+
.. code-block:: python
189+
190+
In [8]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5])
191+
Out[8]: array([ True, False, False], dtype=bool)
192+
193+
In [9]: pd.Index([1, 2, 3]) == pd.Index([2])
194+
ValueError: Lengths must match to compare
195+
196+
In [10]: pd.Index([1, 2, 3]) == pd.Index([1, 2])
197+
ValueError: Lengths must match to compare
198+
199+
In [11]: pd.Series([1, 2, 3]) == pd.Series([1, 4, 5])
200+
Out[11]:
201+
0 True
202+
1 False
203+
2 False
204+
dtype: bool
205+
206+
In [12]: pd.Series([1, 2, 3]) == pd.Series([2])
207+
ValueError: Series lengths must match to compare
208+
209+
In [13]: pd.Series([1, 2, 3]) == pd.Series([1, 2])
210+
ValueError: Series lengths must match to compare
211+
212+
Note that this is different from the ``numpy`` behavior where a comparison can
213+
be broadcast:
214+
215+
.. ipython:: python
216+
217+
np.array([1, 2, 3]) == np.array([1])
218+
219+
or it can return False if broadcasting can not be done:
220+
221+
.. ipython:: python
222+
223+
np.array([1, 2, 3]) == np.array([1, 2])
224+
155225
.. _whatsnew_0170.api_breaking.other:
156226

157227
Other API Changes
@@ -256,3 +326,4 @@ Bug Fixes
256326

257327
- Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`)
258328

329+
- Bug in operator equal on Index not being consistent with Series (:issue:`9947`)

pandas/core/index.py

+3
Original file line numberDiff line numberDiff line change
@@ -2593,6 +2593,9 @@ def _add_comparison_methods(cls):
25932593
def _make_compare(op):
25942594

25952595
def _evaluate_compare(self, other):
2596+
if isinstance(other, (np.ndarray, Index, ABCSeries)):
2597+
if other.ndim > 0 and len(self) != len(other):
2598+
raise ValueError('Lengths must match to compare')
25962599
func = getattr(self.values, op)
25972600
result = func(np.asarray(other))
25982601

pandas/tests/test_index.py

+63-53
Original file line numberDiff line numberDiff line change
@@ -1550,22 +1550,70 @@ def test_groupby(self):
15501550
tm.assert_dict_equal(groups, exp)
15511551

15521552
def test_equals_op(self):
1553-
# For issue #9785
1553+
# GH9947
15541554
index_a = Index(['foo', 'bar', 'baz'])
15551555
index_b = Index(['foo', 'bar', 'baz', 'qux'])
1556-
# Testing Numpy Results Equivelent
1557-
assert_array_equal(
1558-
index_a.equals(index_a),
1559-
index_a == index_a
1560-
)
1561-
assert_array_equal(
1562-
index_a.equals(index_b),
1563-
index_a == index_b,
1564-
)
1565-
assert_array_equal(
1566-
index_b.equals(index_a),
1567-
index_b == index_a,
1568-
)
1556+
index_c = Index(['foo', 'bar', 'qux'])
1557+
index_d = Index(['foo'])
1558+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1559+
index_a == index_b
1560+
assert_array_equal(index_a == index_a, np.array([True, True, True]))
1561+
assert_array_equal(index_a == index_c, np.array([True, True, False]))
1562+
1563+
# test comparisons with numpy arrays
1564+
array_a = np.array(['foo', 'bar', 'baz'])
1565+
array_b = np.array(['foo', 'bar', 'baz', 'qux'])
1566+
array_c = np.array(['foo', 'bar', 'qux'])
1567+
array_d = np.array(['foo'])
1568+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1569+
index_a == array_b
1570+
assert_array_equal(index_a == array_a, np.array([True, True, True]))
1571+
assert_array_equal(index_a == array_c, np.array([True, True, False]))
1572+
1573+
# test comparisons with Series
1574+
series_a = Series(['foo', 'bar', 'baz'])
1575+
series_b = Series(['foo', 'bar', 'baz', 'qux'])
1576+
series_c = Series(['foo', 'bar', 'qux'])
1577+
series_d = Series(['foo'])
1578+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1579+
index_a == series_b
1580+
assert_array_equal(index_a == series_a, np.array([True, True, True]))
1581+
assert_array_equal(index_a == series_c, np.array([True, True, False]))
1582+
1583+
# cases where length is 1 for one of them
1584+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1585+
index_a == index_d
1586+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1587+
index_a == series_d
1588+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1589+
index_a == array_d
1590+
with tm.assertRaisesRegexp(ValueError, "Series lengths must match"):
1591+
series_a == series_d
1592+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1593+
series_a == array_d
1594+
1595+
# comparing with scalar should broadcast
1596+
assert_array_equal(index_a == 'foo', np.array([True, False, False]))
1597+
assert_array_equal(series_a == 'foo', np.array([True, False, False]))
1598+
assert_array_equal(array_a == 'foo', np.array([True, False, False]))
1599+
1600+
# GH9785
1601+
# test comparisons of multiindex
1602+
from pandas.compat import StringIO
1603+
df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
1604+
assert_array_equal(df.index == df.index, np.array([True, True]))
1605+
1606+
mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)])
1607+
assert_array_equal(df.index == mi1, np.array([True, True]))
1608+
mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)])
1609+
assert_array_equal(df.index == mi2, np.array([True, False]))
1610+
mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
1611+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1612+
df.index == mi3
1613+
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
1614+
df.index == index_a
1615+
assert_array_equal(index_a == mi3, np.array([False, False, False]))
1616+
15691617

15701618
class TestCategoricalIndex(Base, tm.TestCase):
15711619
_holder = CategoricalIndex
@@ -4815,47 +4863,9 @@ def test_index_name_retained(self):
48154863
tm.assert_frame_equal(result, df_expected)
48164864

48174865
def test_equals_operator(self):
4818-
# For issue #9785
4866+
# GH9785
48194867
self.assertTrue((self.index == self.index).all())
48204868

4821-
def test_index_compare(self):
4822-
# For issue #9785
4823-
index_unequal = Index(['foo', 'bar', 'baz'])
4824-
index_equal = Index([
4825-
('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
4826-
('baz', 'two'), ('qux', 'one'), ('qux', 'two')
4827-
], tupleize_cols=False)
4828-
# Testing Numpy Results Equivelent
4829-
assert_array_equal(
4830-
index_unequal.equals(self.index),
4831-
index_unequal == self.index,
4832-
err_msg = 'Index compared with MultiIndex failed',
4833-
)
4834-
assert_array_equal(
4835-
self.index.equals(index_unequal),
4836-
self.index == index_unequal,
4837-
err_msg = 'MultiIndex compared with Index failed',
4838-
)
4839-
assert_array_equal(
4840-
self.index.equals(index_equal),
4841-
self.index == index_equal,
4842-
err_msg = 'MultiIndex compared with Similar Index failed',
4843-
)
4844-
assert_array_equal(
4845-
index_equal.equals(self.index),
4846-
index_equal == self.index,
4847-
err_msg = 'Index compared with Similar MultiIndex failed',
4848-
)
4849-
# Testing that the result is true for the index_equal case
4850-
self.assertTrue(
4851-
(self.index == index_equal).all(),
4852-
msg='Assert Index compared with Similar MultiIndex match'
4853-
)
4854-
self.assertTrue(
4855-
(index_equal == self.index).all(),
4856-
msg='Assert MultiIndex compared with Similar Index match'
4857-
)
4858-
48594869

48604870
def test_get_combined_index():
48614871
from pandas.core.index import _get_combined_index

0 commit comments

Comments
 (0)