pandas-dev · jreback · Oct 12, 2013 · Oct 12, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -591,6 +591,8 @@ Bug Fixes
     (:issue:`5123`, :issue:`5125`)
   - Allow duplicate indices when performing operations that align (:issue:`5185`)
   - Compound dtypes in a constructor raise ``NotImplementedError`` (:issue:`5191`)
+  - Bug in comparing duplicate frames (:issue:`4421`) related
+  - Bug in describe on duplicate frames
 
 pandas 0.12.0
 -------------

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2848,28 +2848,34 @@ def _combine_const(self, other, func, raise_on_error=True):
         new_data = self._data.eval(func, other, raise_on_error=raise_on_error)
         return self._constructor(new_data)
 
+
+    def _compare_frame_evaluate(self, other, func, str_rep):
+
+        # unique
+        if self.columns.is_unique:
+            def _compare(a, b):
+                return dict([(col, func(a[col], b[col])) for col in a.columns])
+            new_data = expressions.evaluate(_compare, str_rep, self, other)
+            return self._constructor(data=new_data, index=self.index,
+                                     columns=self.columns, copy=False)
+        # non-unique
+        else:
+            def _compare(a, b):
+                return [func(a.iloc[:,i], b.iloc[:,i]) for i, col in enumerate(a.columns)]
+            new_data = expressions.evaluate(_compare, str_rep, self, other)
+            return self._constructor(data=new_data, index=self.columns,
+                                     columns=self.index, copy=False).T
+
     def _compare_frame(self, other, func, str_rep):
         if not self._indexed_same(other):
             raise ValueError('Can only compare identically-labeled '
                             'DataFrame objects')
-
-        def _compare(a, b):
-            return dict([(col, func(a[col], b[col])) for col in a.columns])
-        new_data = expressions.evaluate(_compare, str_rep, self, other)
-
-        return self._constructor(data=new_data, index=self.index,
-                                 columns=self.columns, copy=False)
+        return self._compare_frame_evaluate(other, func, str_rep)
 
     def _flex_compare_frame(self, other, func, str_rep, level):
         if not self._indexed_same(other):
             self, other = self.align(other, 'outer', level=level)
-
-        def _compare(a, b):
-            return dict([(col, func(a[col], b[col])) for col in a.columns])
-        new_data = expressions.evaluate(_compare, str_rep, self, other)
-
-        return self._constructor(data=new_data, index=self.index,
-                                 columns=self.columns, copy=False)
+        return self._compare_frame_evaluate(other, func, str_rep)
 
     def combine(self, other, func, fill_value=None, overwrite=True):
         """
@@ -3792,8 +3798,8 @@ def pretty_name(x):
 
         destat = []
 
-        for column in numdata.columns:
-            series = self[column]
+        for i in range(len(numdata.columns)):
+            series = numdata.iloc[:,i]
             destat.append([series.count(), series.mean(), series.std(),
                            series.min(), series.quantile(lb), series.median(),
                            series.quantile(ub), series.max()])

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -3179,6 +3179,14 @@ def check(result, expected=None):
         expected = DataFrame([[1],[1],[1]],columns=['bar'])
         check(df,expected)
 
+        # describe
+        df = DataFrame([[1,1,1],[2,2,2],[3,3,3]],columns=['bar','a','a'],dtype='float64')
+        result = df.describe()
+        s = df.iloc[:,0].describe()
+        expected = pd.concat([ s, s, s],keys=df.columns,axis=1)
+        check(result,expected)
+
+
     def test_column_dups_indexing(self):
 
         def check(result, expected=None):
@@ -3217,6 +3225,18 @@ def check(result, expected=None):
         result = df1.sub(df2)
         assert_frame_equal(result,expected)
 
+        # equality
+        df1 = DataFrame([[1,2],[2,np.nan],[3,4],[4,4]],columns=['A','B'])
+        df2 = DataFrame([[0,1],[2,4],[2,np.nan],[4,5]],columns=['A','A'])
+
+        # not-comparing like-labelled
+        self.assertRaises(ValueError, lambda : df1 == df2)
+
+        df1r = df1.reindex_like(df2)
+        result = df1r == df2
+        expected = DataFrame([[False,True],[True,False],[False,False],[True,False]],columns=['A','A'])
+        assert_frame_equal(result,expected)
+
     def test_insert_benchmark(self):
         # from the vb_suite/frame_methods/frame_insert_columns
         N = 10