Skip to content

Commit c4f8c54

Browse files
committed
PERF: perf regression with mixed-type ops using numexpr (GH5481)
BUG: non-unique ops not aligning correctly
1 parent 9e2c3d6 commit c4f8c54

File tree

2 files changed

+36
-8
lines changed

2 files changed

+36
-8
lines changed

pandas/core/frame.py

+27-8
Original file line numberDiff line numberDiff line change
@@ -2810,11 +2810,28 @@ def _arith_op(left, right):
28102810
return func(left, right)
28112811

28122812
if this._is_mixed_type or other._is_mixed_type:
2813-
# XXX no good for duplicate columns
2814-
# but cannot outer join in align if dups anyways?
2815-
result = {}
2816-
for col in this:
2817-
result[col] = _arith_op(this[col].values, other[col].values)
2813+
2814+
# unique
2815+
if this.columns.is_unique:
2816+
2817+
def f(col):
2818+
r = _arith_op(this[col].values, other[col].values)
2819+
return self._constructor_sliced(r,index=new_index,dtype=r.dtype)
2820+
2821+
result = dict([ (col, f(col)) for col in this ])
2822+
2823+
# non-unique
2824+
else:
2825+
2826+
def f(i):
2827+
r = _arith_op(this.iloc[:,i].values, other.iloc[:,i].values)
2828+
return self._constructor_sliced(r,index=new_index,dtype=r.dtype)
2829+
2830+
result = dict([ (i,f(i)) for i, col in enumerate(this.columns) ])
2831+
result = self._constructor(result, index=new_index, copy=False)
2832+
result.columns = new_columns
2833+
return result
2834+
28182835
else:
28192836
result = _arith_op(this.values, other.values)
28202837

@@ -2890,10 +2907,12 @@ def _compare(a, b):
28902907
# non-unique
28912908
else:
28922909
def _compare(a, b):
2893-
return [func(a.iloc[:,i], b.iloc[:,i]) for i, col in enumerate(a.columns)]
2910+
return dict([(i,func(a.iloc[:,i], b.iloc[:,i])) for i, col in enumerate(a.columns)])
28942911
new_data = expressions.evaluate(_compare, str_rep, self, other)
2895-
return self._constructor(data=new_data, index=self.columns,
2896-
columns=self.index, copy=False).T
2912+
result = self._constructor(data=new_data, index=self.index,
2913+
copy=False)
2914+
result.columns = self.columns
2915+
return result
28972916

28982917
def _compare_frame(self, other, func, str_rep):
28992918
if not self._indexed_same(other):

pandas/tests/test_frame.py

+9
Original file line numberDiff line numberDiff line change
@@ -3217,6 +3217,15 @@ def check(result, expected=None):
32173217
this_df['A'] = index
32183218
check(this_df, expected_df)
32193219

3220+
# operations
3221+
for op in ['__add__','__mul__','__sub__','__truediv__']:
3222+
df = DataFrame(dict(A = np.arange(10), B = np.random.rand(10)))
3223+
expected = getattr(df,op)(df)
3224+
expected.columns = ['A','A']
3225+
df.columns = ['A','A']
3226+
result = getattr(df,op)(df)
3227+
check(result,expected)
3228+
32203229
def test_column_dups_indexing(self):
32213230

32223231
def check(result, expected=None):

0 commit comments

Comments
 (0)