Skip to content

Commit 29a524c

Browse files
committed
BUG: Bug in comparing duplicate frames (GH4421) related
BUG: Bug in describe on duplicate frames
1 parent 1e51c27 commit 29a524c

File tree

3 files changed

+44
-16
lines changed

3 files changed

+44
-16
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,8 @@ Bug Fixes
591591
(:issue:`5123`, :issue:`5125`)
592592
- Allow duplicate indices when performing operations that align (:issue:`5185`)
593593
- Compound dtypes in a constructor raise ``NotImplementedError`` (:issue:`5191`)
594+
- Bug in comparing duplicate frames (:issue:`4421`) related
595+
- Bug in describe on duplicate frames
594596

595597
pandas 0.12.0
596598
-------------

pandas/core/frame.py

+22-16
Original file line numberDiff line numberDiff line change
@@ -2848,28 +2848,34 @@ def _combine_const(self, other, func, raise_on_error=True):
28482848
new_data = self._data.eval(func, other, raise_on_error=raise_on_error)
28492849
return self._constructor(new_data)
28502850

2851+
2852+
def _compare_frame_evaluate(self, other, func, str_rep):
2853+
2854+
# unique
2855+
if self.columns.is_unique:
2856+
def _compare(a, b):
2857+
return dict([(col, func(a[col], b[col])) for col in a.columns])
2858+
new_data = expressions.evaluate(_compare, str_rep, self, other)
2859+
return self._constructor(data=new_data, index=self.index,
2860+
columns=self.columns, copy=False)
2861+
# non-unique
2862+
else:
2863+
def _compare(a, b):
2864+
return [func(a.iloc[:,i], b.iloc[:,i]) for i, col in enumerate(a.columns)]
2865+
new_data = expressions.evaluate(_compare, str_rep, self, other)
2866+
return self._constructor(data=new_data, index=self.columns,
2867+
columns=self.index, copy=False).T
2868+
28512869
def _compare_frame(self, other, func, str_rep):
28522870
if not self._indexed_same(other):
28532871
raise ValueError('Can only compare identically-labeled '
28542872
'DataFrame objects')
2855-
2856-
def _compare(a, b):
2857-
return dict([(col, func(a[col], b[col])) for col in a.columns])
2858-
new_data = expressions.evaluate(_compare, str_rep, self, other)
2859-
2860-
return self._constructor(data=new_data, index=self.index,
2861-
columns=self.columns, copy=False)
2873+
return self._compare_frame_evaluate(other, func, str_rep)
28622874

28632875
def _flex_compare_frame(self, other, func, str_rep, level):
28642876
if not self._indexed_same(other):
28652877
self, other = self.align(other, 'outer', level=level)
2866-
2867-
def _compare(a, b):
2868-
return dict([(col, func(a[col], b[col])) for col in a.columns])
2869-
new_data = expressions.evaluate(_compare, str_rep, self, other)
2870-
2871-
return self._constructor(data=new_data, index=self.index,
2872-
columns=self.columns, copy=False)
2878+
return self._compare_frame_evaluate(other, func, str_rep)
28732879

28742880
def combine(self, other, func, fill_value=None, overwrite=True):
28752881
"""
@@ -3792,8 +3798,8 @@ def pretty_name(x):
37923798

37933799
destat = []
37943800

3795-
for column in numdata.columns:
3796-
series = self[column]
3801+
for i in range(len(numdata.columns)):
3802+
series = numdata.iloc[:,i]
37973803
destat.append([series.count(), series.mean(), series.std(),
37983804
series.min(), series.quantile(lb), series.median(),
37993805
series.quantile(ub), series.max()])

pandas/tests/test_frame.py

+20
Original file line numberDiff line numberDiff line change
@@ -3179,6 +3179,14 @@ def check(result, expected=None):
31793179
expected = DataFrame([[1],[1],[1]],columns=['bar'])
31803180
check(df,expected)
31813181

3182+
# describe
3183+
df = DataFrame([[1,1,1],[2,2,2],[3,3,3]],columns=['bar','a','a'],dtype='float64')
3184+
result = df.describe()
3185+
s = df.iloc[:,0].describe()
3186+
expected = pd.concat([ s, s, s],keys=df.columns,axis=1)
3187+
check(result,expected)
3188+
3189+
31823190
def test_column_dups_indexing(self):
31833191

31843192
def check(result, expected=None):
@@ -3217,6 +3225,18 @@ def check(result, expected=None):
32173225
result = df1.sub(df2)
32183226
assert_frame_equal(result,expected)
32193227

3228+
# equality
3229+
df1 = DataFrame([[1,2],[2,np.nan],[3,4],[4,4]],columns=['A','B'])
3230+
df2 = DataFrame([[0,1],[2,4],[2,np.nan],[4,5]],columns=['A','A'])
3231+
3232+
# not-comparing like-labelled
3233+
self.assertRaises(ValueError, lambda : df1 == df2)
3234+
3235+
df1r = df1.reindex_like(df2)
3236+
result = df1r == df2
3237+
expected = DataFrame([[False,True],[True,False],[False,False],[True,False]],columns=['A','A'])
3238+
assert_frame_equal(result,expected)
3239+
32203240
def test_insert_benchmark(self):
32213241
# from the vb_suite/frame_methods/frame_insert_columns
32223242
N = 10

0 commit comments

Comments
 (0)