diff --git a/doc/source/release.rst b/doc/source/release.rst index 5001c5142f330..55f786d263a0a 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -591,6 +591,8 @@ Bug Fixes (:issue:`5123`, :issue:`5125`) - Allow duplicate indices when performing operations that align (:issue:`5185`) - Compound dtypes in a constructor raise ``NotImplementedError`` (:issue:`5191`) + - Bug in comparing duplicate frames (:issue:`4421`) related + - Bug in describe on duplicate frames pandas 0.12.0 ------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 24a4e4800e750..967da6102ae1a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2848,28 +2848,34 @@ def _combine_const(self, other, func, raise_on_error=True): new_data = self._data.eval(func, other, raise_on_error=raise_on_error) return self._constructor(new_data) + + def _compare_frame_evaluate(self, other, func, str_rep): + + # unique + if self.columns.is_unique: + def _compare(a, b): + return dict([(col, func(a[col], b[col])) for col in a.columns]) + new_data = expressions.evaluate(_compare, str_rep, self, other) + return self._constructor(data=new_data, index=self.index, + columns=self.columns, copy=False) + # non-unique + else: + def _compare(a, b): + return [func(a.iloc[:,i], b.iloc[:,i]) for i, col in enumerate(a.columns)] + new_data = expressions.evaluate(_compare, str_rep, self, other) + return self._constructor(data=new_data, index=self.columns, + columns=self.index, copy=False).T + def _compare_frame(self, other, func, str_rep): if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') - - def _compare(a, b): - return dict([(col, func(a[col], b[col])) for col in a.columns]) - new_data = expressions.evaluate(_compare, str_rep, self, other) - - return self._constructor(data=new_data, index=self.index, - columns=self.columns, copy=False) + return self._compare_frame_evaluate(other, func, str_rep) def _flex_compare_frame(self, other, func, str_rep, level): if not self._indexed_same(other): self, other = self.align(other, 'outer', level=level) - - def _compare(a, b): - return dict([(col, func(a[col], b[col])) for col in a.columns]) - new_data = expressions.evaluate(_compare, str_rep, self, other) - - return self._constructor(data=new_data, index=self.index, - columns=self.columns, copy=False) + return self._compare_frame_evaluate(other, func, str_rep) def combine(self, other, func, fill_value=None, overwrite=True): """ @@ -3792,8 +3798,8 @@ def pretty_name(x): destat = [] - for column in numdata.columns: - series = self[column] + for i in range(len(numdata.columns)): + series = numdata.iloc[:,i] destat.append([series.count(), series.mean(), series.std(), series.min(), series.quantile(lb), series.median(), series.quantile(ub), series.max()]) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 6e7683d29a934..fe0f9244c31a3 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3179,6 +3179,14 @@ def check(result, expected=None): expected = DataFrame([[1],[1],[1]],columns=['bar']) check(df,expected) + # describe + df = DataFrame([[1,1,1],[2,2,2],[3,3,3]],columns=['bar','a','a'],dtype='float64') + result = df.describe() + s = df.iloc[:,0].describe() + expected = pd.concat([ s, s, s],keys=df.columns,axis=1) + check(result,expected) + + def test_column_dups_indexing(self): def check(result, expected=None): @@ -3217,6 +3225,18 @@ def check(result, expected=None): result = df1.sub(df2) assert_frame_equal(result,expected) + # equality + df1 = DataFrame([[1,2],[2,np.nan],[3,4],[4,4]],columns=['A','B']) + df2 = DataFrame([[0,1],[2,4],[2,np.nan],[4,5]],columns=['A','A']) + + # not-comparing like-labelled + self.assertRaises(ValueError, lambda : df1 == df2) + + df1r = df1.reindex_like(df2) + result = df1r == df2 + expected = DataFrame([[False,True],[True,False],[False,False],[True,False]],columns=['A','A']) + assert_frame_equal(result,expected) + def test_insert_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10