Skip to content

Commit 64859ec

Browse files
PERF: operate on arrays instead of Series in DataFrame/DataFrame ops (pandas-dev#33561)
1 parent 9b4ea44 commit 64859ec

File tree

1 file changed

+9
-17
lines changed

1 file changed

+9
-17
lines changed

pandas/core/ops/__init__.py

+9-17
Original file line numberDiff line numberDiff line change
@@ -262,15 +262,11 @@ def dispatch_to_series(left, right, func, axis=None):
262262
-------
263263
DataFrame
264264
"""
265-
# Note: we use iloc to access columns for compat with cases
266-
# with non-unique columns.
267-
import pandas.core.computation.expressions as expressions
265+
# Get the appropriate array-op to apply to each column/block's values.
266+
array_op = get_array_op(func)
268267

269268
right = lib.item_from_zerodim(right)
270269
if lib.is_scalar(right) or np.ndim(right) == 0:
271-
272-
# Get the appropriate array-op to apply to each block's values.
273-
array_op = get_array_op(func)
274270
bm = left._mgr.apply(array_op, right=right)
275271
return type(left)(bm)
276272

@@ -281,7 +277,6 @@ def dispatch_to_series(left, right, func, axis=None):
281277
# fails in cases with empty columns reached via
282278
# _frame_arith_method_with_reindex
283279

284-
array_op = get_array_op(func)
285280
bm = left._mgr.operate_blockwise(right._mgr, array_op)
286281
return type(left)(bm)
287282

@@ -295,27 +290,24 @@ def dispatch_to_series(left, right, func, axis=None):
295290
# Note: we do not do this unconditionally as it may be lossy or
296291
# expensive for EA dtypes.
297292
right = np.asarray(right)
298-
299-
def column_op(a, b):
300-
return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))}
301-
302293
else:
294+
right = right._values
303295

304-
def column_op(a, b):
305-
return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))}
296+
arrays = [array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)]
306297

307298
elif isinstance(right, ABCSeries):
308299
assert right.index.equals(left.index) # Handle other cases later
300+
right = right._values
309301

310-
def column_op(a, b):
311-
return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))}
302+
arrays = [array_op(l, right) for l in left._iter_column_arrays()]
312303

313304
else:
314305
# Remaining cases have less-obvious dispatch rules
315306
raise NotImplementedError(right)
316307

317-
new_data = expressions.evaluate(column_op, left, right)
318-
return new_data
308+
return type(left)._from_arrays(
309+
arrays, left.columns, left.index, verify_integrity=False
310+
)
319311

320312

321313
# -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)