Skip to content

Commit 16a6e9c

Browse files
jbrockmendeljreback
authored andcommitted
[PERF] use numexpr in dispatch_to_series (#22284)
1 parent aeb9299 commit 16a6e9c

File tree

2 files changed

+38
-27
lines changed

2 files changed

+38
-27
lines changed

pandas/core/frame.py

+12-19
Original file line numberDiff line numberDiff line change
@@ -4837,15 +4837,23 @@ def _arith_op(left, right):
48374837
copy=False)
48384838

48394839
def _combine_match_index(self, other, func, level=None):
4840+
assert isinstance(other, Series)
48404841
left, right = self.align(other, join='outer', axis=0, level=level,
48414842
copy=False)
48424843
assert left.index.equals(right.index)
4843-
new_data = func(left.values.T, right.values).T
4844-
return self._constructor(new_data,
4845-
index=left.index, columns=self.columns,
4846-
copy=False)
4844+
4845+
if left._is_mixed_type or right._is_mixed_type:
4846+
# operate column-wise; avoid costly object-casting in `.values`
4847+
return ops.dispatch_to_series(left, right, func)
4848+
else:
4849+
# fastpath --> operate directly on values
4850+
new_data = func(left.values.T, right.values).T
4851+
return self._constructor(new_data,
4852+
index=left.index, columns=self.columns,
4853+
copy=False)
48474854

48484855
def _combine_match_columns(self, other, func, level=None, try_cast=True):
4856+
assert isinstance(other, Series)
48494857
left, right = self.align(other, join='outer', axis=1, level=level,
48504858
copy=False)
48514859
assert left.columns.equals(right.index)
@@ -4864,21 +4872,6 @@ def _combine_const(self, other, func, errors='raise', try_cast=True):
48644872
try_cast=try_cast)
48654873
return self._constructor(new_data)
48664874

4867-
def _compare_frame(self, other, func, str_rep):
4868-
# compare_frame assumes self._indexed_same(other)
4869-
4870-
import pandas.core.computation.expressions as expressions
4871-
4872-
def _compare(a, b):
4873-
return {i: func(a.iloc[:, i], b.iloc[:, i])
4874-
for i in range(len(a.columns))}
4875-
4876-
new_data = expressions.evaluate(_compare, str_rep, self, other)
4877-
result = self._constructor(data=new_data, index=self.index,
4878-
copy=False)
4879-
result.columns = self.columns
4880-
return result
4881-
48824875
def combine(self, other, func, fill_value=None, overwrite=True):
48834876
"""
48844877
Perform column-wise combine with another DataFrame based on a

pandas/core/ops.py

+26-8
Original file line numberDiff line numberDiff line change
@@ -1621,7 +1621,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
16211621
# -----------------------------------------------------------------------------
16221622
# DataFrame
16231623

1624-
def dispatch_to_series(left, right, func):
1624+
def dispatch_to_series(left, right, func, str_rep=None):
16251625
"""
16261626
Evaluate the frame operation func(left, right) by evaluating
16271627
column-by-column, dispatching to the Series implementation.
@@ -1631,24 +1631,42 @@ def dispatch_to_series(left, right, func):
16311631
left : DataFrame
16321632
right : scalar or DataFrame
16331633
func : arithmetic or comparison operator
1634+
str_rep : str or None, default None
16341635
16351636
Returns
16361637
-------
16371638
DataFrame
16381639
"""
16391640
# Note: we use iloc to access columns for compat with cases
16401641
# with non-unique columns.
1642+
import pandas.core.computation.expressions as expressions
1643+
16411644
right = lib.item_from_zerodim(right)
16421645
if lib.is_scalar(right):
1643-
new_data = {i: func(left.iloc[:, i], right)
1644-
for i in range(len(left.columns))}
1646+
1647+
def column_op(a, b):
1648+
return {i: func(a.iloc[:, i], b)
1649+
for i in range(len(a.columns))}
1650+
16451651
elif isinstance(right, ABCDataFrame):
16461652
assert right._indexed_same(left)
1647-
new_data = {i: func(left.iloc[:, i], right.iloc[:, i])
1648-
for i in range(len(left.columns))}
1653+
1654+
def column_op(a, b):
1655+
return {i: func(a.iloc[:, i], b.iloc[:, i])
1656+
for i in range(len(a.columns))}
1657+
1658+
elif isinstance(right, ABCSeries):
1659+
assert right.index.equals(left.index) # Handle other cases later
1660+
1661+
def column_op(a, b):
1662+
return {i: func(a.iloc[:, i], b)
1663+
for i in range(len(a.columns))}
1664+
16491665
else:
16501666
# Remaining cases have less-obvious dispatch rules
1651-
raise NotImplementedError
1667+
raise NotImplementedError(right)
1668+
1669+
new_data = expressions.evaluate(column_op, str_rep, left, right)
16521670

16531671
result = left._constructor(new_data, index=left.index, copy=False)
16541672
# Pin columns instead of passing to constructor for compat with
@@ -1818,7 +1836,7 @@ def f(self, other, axis=default_axis, level=None):
18181836
if not self._indexed_same(other):
18191837
self, other = self.align(other, 'outer',
18201838
level=level, copy=False)
1821-
return self._compare_frame(other, na_op, str_rep)
1839+
return dispatch_to_series(self, other, na_op, str_rep)
18221840

18231841
elif isinstance(other, ABCSeries):
18241842
return _combine_series_frame(self, other, na_op,
@@ -1843,7 +1861,7 @@ def f(self, other):
18431861
if not self._indexed_same(other):
18441862
raise ValueError('Can only compare identically-labeled '
18451863
'DataFrame objects')
1846-
return self._compare_frame(other, func, str_rep)
1864+
return dispatch_to_series(self, other, func, str_rep)
18471865

18481866
elif isinstance(other, ABCSeries):
18491867
return _combine_series_frame(self, other, func,

0 commit comments

Comments
 (0)