pandas-dev · jorisvandenbossche · Jul 16, 2015 · Apr 20, 2015 · jorisvandenbossche · Jul 15, 2015
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -240,14 +240,14 @@ way to summarize a boolean result.
 
 .. ipython:: python
 
-   (df>0).all()
-   (df>0).any()
+   (df > 0).all()
+   (df > 0).any()
 
 You can reduce to a final boolean value.
 
 .. ipython:: python
 
-   (df>0).any().any()
+   (df > 0).any().any()
 
 You can test if a pandas object is empty, via the :attr:`~DataFrame.empty` property.
 
@@ -330,6 +330,48 @@ equality to be True:
    df1.equals(df2)
    df1.equals(df2.sort())
 
+Comparing array-like objects
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can conveniently do element-wise comparisons when comparing a pandas
+data structure with a scalar value:
+
+.. ipython:: python
+
+   pd.Series(['foo', 'bar', 'baz']) == 'foo'
+   pd.Index(['foo', 'bar', 'baz']) == 'foo'
+
+Pandas also handles element-wise comparisons between different array-like
+objects of the same length:
+
+.. ipython:: python
+
+    pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])
+    pd.Series(['foo', 'bar', 'baz']) == np.array(['foo', 'bar', 'qux'])
+
+Trying to compare ``Index`` or ``Series`` objects of different lengths will
+raise a ValueError:
+
+.. code-block:: python
+
+    In [55]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo', 'bar'])
+    ValueError: Series lengths must match to compare
+
+    In [56]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo'])
+    ValueError: Series lengths must match to compare
+
+Note that this is different from the numpy behavior where a comparison can
+be broadcast:
+
+.. ipython:: python
+
+    np.array([1, 2, 3]) == np.array([2])
+
+or it can return False if broadcasting can not be done:
+
+.. ipython:: python
+
+    np.array([1, 2, 3]) == np.array([1, 2])
 
 Combining overlapping data sets
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -46,6 +46,76 @@ Backwards incompatible API changes
 
 .. _whatsnew_0170.api_breaking:
 
+- Operator equal on Index should behavior similarly to Series (:issue:`9947`)
+
+Starting in v0.17.0, comparing ``Index`` objects of different lengths will raise
+a ``ValueError``. This is to be consistent with the behavior of ``Series``.
+
+Previous behavior:
+
+.. code-block:: python
+
+   In [2]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5])
+   Out[2]: array([ True, False, False], dtype=bool)
+
+   In [3]: pd.Index([1, 2, 3]) == pd.Index([2])
+   Out[3]: array([False,  True, False], dtype=bool)
+
+   In [4]: pd.Index([1, 2, 3]) == pd.Index([1, 2])
+   Out[4]: False
+
+   In [5]: pd.Series([1, 2, 3]) == pd.Series([1, 4, 5])
+   Out[5]:
+   0     True
+   1    False
+   2    False
+   dtype: bool
+
+   In [6]: pd.Series([1, 2, 3]) == pd.Series([2])
+   ValueError: Series lengths must match to compare
+
+   In [7]: pd.Series([1, 2, 3]) == pd.Series([1, 2])
+   ValueError: Series lengths must match to compare
+
+New behavior:
+
+.. code-block:: python
+
+   In [8]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5])
+   Out[8]: array([ True, False, False], dtype=bool)
+
+   In [9]: pd.Index([1, 2, 3]) == pd.Index([2])
+   ValueError: Lengths must match to compare
+
+   In [10]: pd.Index([1, 2, 3]) == pd.Index([1, 2])
+   ValueError: Lengths must match to compare
+
+   In [11]: pd.Series([1, 2, 3]) == pd.Series([1, 4, 5])
+   Out[11]:
+   0     True
+   1    False
+   2    False
+   dtype: bool
+
+   In [12]: pd.Series([1, 2, 3]) == pd.Series([2])
+   ValueError: Series lengths must match to compare
+
+   In [13]: pd.Series([1, 2, 3]) == pd.Series([1, 2])
+   ValueError: Series lengths must match to compare
+
+Note that this is different from the ``numpy`` behavior where a comparison can
+be broadcast:
+
+.. ipython:: python
+
+   np.array([1, 2, 3]) == np.array([1])
+
+or it can return False if broadcasting can not be done:
+
+.. ipython:: python
+
+   np.array([1, 2, 3]) == np.array([1, 2])
+
 .. _whatsnew_0170.api_breaking.other:
 
 Other API Changes
@@ -149,3 +219,4 @@ Bug Fixes
 
 - Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`)
 
+- Bug in operator equal on Index not being consistent with Series (:issue:`9947`)
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -2593,6 +2593,9 @@ def _add_comparison_methods(cls):
         def _make_compare(op):
 
             def _evaluate_compare(self, other):
+                if isinstance(other, (np.ndarray, Index, ABCSeries)):
+                    if other.ndim > 0 and len(self) != len(other):
+                        raise ValueError('Lengths must match to compare')
                 func = getattr(self.values, op)
                 result = func(np.asarray(other))
 

diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -1550,22 +1550,70 @@ def test_groupby(self):
         tm.assert_dict_equal(groups, exp)
 
     def test_equals_op(self):
-        # For issue #9785
+        # GH9947
         index_a = Index(['foo', 'bar', 'baz'])
         index_b = Index(['foo', 'bar', 'baz', 'qux'])
-        # Testing Numpy Results Equivelent
-        assert_array_equal(
-            index_a.equals(index_a),
-            index_a == index_a
-        )
-        assert_array_equal(
-            index_a.equals(index_b),
-            index_a == index_b,
-        )
-        assert_array_equal(
-            index_b.equals(index_a),
-            index_b == index_a,
-        )
+        index_c = Index(['foo', 'bar', 'qux'])
+        index_d = Index(['foo'])
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            index_a == index_b
+        assert_array_equal(index_a == index_a, np.array([True, True, True]))
+        assert_array_equal(index_a == index_c, np.array([True, True, False]))
+
+        # test comparisons with numpy arrays
+        array_a = np.array(['foo', 'bar', 'baz'])
+        array_b = np.array(['foo', 'bar', 'baz', 'qux'])
+        array_c = np.array(['foo', 'bar', 'qux'])
+        array_d = np.array(['foo'])
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            index_a == array_b
+        assert_array_equal(index_a == array_a, np.array([True, True, True]))
+        assert_array_equal(index_a == array_c, np.array([True, True, False]))
+
+        # test comparisons with Series
+        series_a = Series(['foo', 'bar', 'baz'])
+        series_b = Series(['foo', 'bar', 'baz', 'qux'])
+        series_c = Series(['foo', 'bar', 'qux'])
+        series_d = Series(['foo'])
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            index_a == series_b
+        assert_array_equal(index_a == series_a, np.array([True, True, True]))
+        assert_array_equal(index_a == series_c, np.array([True, True, False]))
+
+        # cases where length is 1 for one of them
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            index_a == index_d
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            index_a == series_d
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            index_a == array_d
+        with tm.assertRaisesRegexp(ValueError, "Series lengths must match"):
+            series_a == series_d
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            series_a == array_d
+
+        # comparing with scalar should broadcast
+        assert_array_equal(index_a == 'foo', np.array([True, False, False]))
+        assert_array_equal(series_a == 'foo', np.array([True, False, False]))
+        assert_array_equal(array_a == 'foo', np.array([True, False, False]))
+
+        # GH9785
+        # test comparisons of multiindex
+        from pandas.compat import StringIO
+        df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
+        assert_array_equal(df.index == df.index, np.array([True, True]))
+
+        mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)])
+        assert_array_equal(df.index == mi1, np.array([True, True]))
+        mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)])
+        assert_array_equal(df.index == mi2, np.array([True, False]))
+        mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            df.index == mi3
+        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
+            df.index == index_a
+        assert_array_equal(index_a == mi3, np.array([False, False, False]))
+
 
 class TestCategoricalIndex(Base, tm.TestCase):
     _holder = CategoricalIndex
@@ -4815,47 +4863,9 @@ def test_index_name_retained(self):
         tm.assert_frame_equal(result, df_expected)
 
     def test_equals_operator(self):
-        # For issue #9785
+        # GH9785
         self.assertTrue((self.index == self.index).all())
 
-    def test_index_compare(self):
-        # For issue #9785
-        index_unequal = Index(['foo', 'bar', 'baz'])
-        index_equal = Index([
-            ('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
-            ('baz', 'two'), ('qux', 'one'), ('qux', 'two')
-        ], tupleize_cols=False)
-        # Testing Numpy Results Equivelent
-        assert_array_equal(
-            index_unequal.equals(self.index),
-            index_unequal == self.index,
-            err_msg = 'Index compared with MultiIndex failed',
-        )
-        assert_array_equal(
-            self.index.equals(index_unequal),
-            self.index == index_unequal,
-            err_msg = 'MultiIndex compared with Index failed',
-        )
-        assert_array_equal(
-            self.index.equals(index_equal),
-            self.index == index_equal,
-            err_msg = 'MultiIndex compared with Similar Index failed',
-        )
-        assert_array_equal(
-            index_equal.equals(self.index),
-            index_equal == self.index,
-            err_msg = 'Index compared with Similar MultiIndex failed',
-        )
-        # Testing that the result is true for the index_equal case
-        self.assertTrue(
-            (self.index == index_equal).all(),
-            msg='Assert Index compared with Similar MultiIndex match'
-        )
-        self.assertTrue(
-            (index_equal == self.index).all(),
-            msg='Assert MultiIndex compared with Similar Index match'
-        )
-
 
 def test_get_combined_index():
     from pandas.core.index import _get_combined_index