Skip to content

Commit 1faac78

Browse files
jbrockmendelaeltanawy
authored andcommitted
[PERF] use numexpr in dispatch_to_series (pandas-dev#22284)
1 parent 0ac130d commit 1faac78

File tree

2 files changed

+38
-27
lines changed

2 files changed

+38
-27
lines changed

pandas/core/frame.py

+12-19
Original file line numberDiff line numberDiff line change
@@ -4823,15 +4823,23 @@ def _arith_op(left, right):
48234823
copy=False)
48244824

48254825
def _combine_match_index(self, other, func, level=None):
4826+
assert isinstance(other, Series)
48264827
left, right = self.align(other, join='outer', axis=0, level=level,
48274828
copy=False)
48284829
assert left.index.equals(right.index)
4829-
new_data = func(left.values.T, right.values).T
4830-
return self._constructor(new_data,
4831-
index=left.index, columns=self.columns,
4832-
copy=False)
4830+
4831+
if left._is_mixed_type or right._is_mixed_type:
4832+
# operate column-wise; avoid costly object-casting in `.values`
4833+
return ops.dispatch_to_series(left, right, func)
4834+
else:
4835+
# fastpath --> operate directly on values
4836+
new_data = func(left.values.T, right.values).T
4837+
return self._constructor(new_data,
4838+
index=left.index, columns=self.columns,
4839+
copy=False)
48334840

48344841
def _combine_match_columns(self, other, func, level=None, try_cast=True):
4842+
assert isinstance(other, Series)
48354843
left, right = self.align(other, join='outer', axis=1, level=level,
48364844
copy=False)
48374845
assert left.columns.equals(right.index)
@@ -4850,21 +4858,6 @@ def _combine_const(self, other, func, errors='raise', try_cast=True):
48504858
try_cast=try_cast)
48514859
return self._constructor(new_data)
48524860

4853-
def _compare_frame(self, other, func, str_rep):
4854-
# compare_frame assumes self._indexed_same(other)
4855-
4856-
import pandas.core.computation.expressions as expressions
4857-
4858-
def _compare(a, b):
4859-
return {i: func(a.iloc[:, i], b.iloc[:, i])
4860-
for i in range(len(a.columns))}
4861-
4862-
new_data = expressions.evaluate(_compare, str_rep, self, other)
4863-
result = self._constructor(data=new_data, index=self.index,
4864-
copy=False)
4865-
result.columns = self.columns
4866-
return result
4867-
48684861
def combine(self, other, func, fill_value=None, overwrite=True):
48694862
"""
48704863
Perform column-wise combine with another DataFrame based on a

pandas/core/ops.py

+26-8
Original file line numberDiff line numberDiff line change
@@ -1621,7 +1621,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
16211621
# -----------------------------------------------------------------------------
16221622
# DataFrame
16231623

1624-
def dispatch_to_series(left, right, func):
1624+
def dispatch_to_series(left, right, func, str_rep=None):
16251625
"""
16261626
Evaluate the frame operation func(left, right) by evaluating
16271627
column-by-column, dispatching to the Series implementation.
@@ -1631,24 +1631,42 @@ def dispatch_to_series(left, right, func):
16311631
left : DataFrame
16321632
right : scalar or DataFrame
16331633
func : arithmetic or comparison operator
1634+
str_rep : str or None, default None
16341635
16351636
Returns
16361637
-------
16371638
DataFrame
16381639
"""
16391640
# Note: we use iloc to access columns for compat with cases
16401641
# with non-unique columns.
1642+
import pandas.core.computation.expressions as expressions
1643+
16411644
right = lib.item_from_zerodim(right)
16421645
if lib.is_scalar(right):
1643-
new_data = {i: func(left.iloc[:, i], right)
1644-
for i in range(len(left.columns))}
1646+
1647+
def column_op(a, b):
1648+
return {i: func(a.iloc[:, i], b)
1649+
for i in range(len(a.columns))}
1650+
16451651
elif isinstance(right, ABCDataFrame):
16461652
assert right._indexed_same(left)
1647-
new_data = {i: func(left.iloc[:, i], right.iloc[:, i])
1648-
for i in range(len(left.columns))}
1653+
1654+
def column_op(a, b):
1655+
return {i: func(a.iloc[:, i], b.iloc[:, i])
1656+
for i in range(len(a.columns))}
1657+
1658+
elif isinstance(right, ABCSeries):
1659+
assert right.index.equals(left.index) # Handle other cases later
1660+
1661+
def column_op(a, b):
1662+
return {i: func(a.iloc[:, i], b)
1663+
for i in range(len(a.columns))}
1664+
16491665
else:
16501666
# Remaining cases have less-obvious dispatch rules
1651-
raise NotImplementedError
1667+
raise NotImplementedError(right)
1668+
1669+
new_data = expressions.evaluate(column_op, str_rep, left, right)
16521670

16531671
result = left._constructor(new_data, index=left.index, copy=False)
16541672
# Pin columns instead of passing to constructor for compat with
@@ -1818,7 +1836,7 @@ def f(self, other, axis=default_axis, level=None):
18181836
if not self._indexed_same(other):
18191837
self, other = self.align(other, 'outer',
18201838
level=level, copy=False)
1821-
return self._compare_frame(other, na_op, str_rep)
1839+
return dispatch_to_series(self, other, na_op, str_rep)
18221840

18231841
elif isinstance(other, ABCSeries):
18241842
return _combine_series_frame(self, other, na_op,
@@ -1843,7 +1861,7 @@ def f(self, other):
18431861
if not self._indexed_same(other):
18441862
raise ValueError('Can only compare identically-labeled '
18451863
'DataFrame objects')
1846-
return self._compare_frame(other, func, str_rep)
1864+
return dispatch_to_series(self, other, func, str_rep)
18471865

18481866
elif isinstance(other, ABCSeries):
18491867
return _combine_series_frame(self, other, func,

0 commit comments

Comments
 (0)