-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PERF: block-wise arithmetic for frame-with-frame #32779
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 51 commits
1697252
a7764d6
30a836d
3559698
4334353
cb40b0c
713a776
95ef3ad
61e5cd6
89c3d7b
e348e46
519c757
2b1ba18
53e93fc
91c86a3
2034084
8aedf35
0c12d35
9727562
6661dd3
42bbbf3
65ab023
0d958a3
7f91e74
56eef51
4baea6f
41a4e7a
b23144e
7f24d57
ae744b7
b14a98c
f42c403
8a2807e
fa046f0
fd10fb6
7ea5d3a
7150e87
bddfbb0
25f83d6
2142d29
0ca2125
1ea0cc0
2bfc308
d5ad2a0
108004b
7ca5f9a
e78570d
33dfbdf
30f655b
65a4eaf
30d6580
fe21f9c
f86deb4
f3dc465
b57f52c
0c46531
463a145
32e70d8
7989251
455e45e
41e8e78
ac8eea8
8c4f951
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1233,12 +1233,21 @@ def reindex_indexer( | |
|
||
return type(self).from_blocks(new_blocks, new_axes) | ||
|
||
def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default): | ||
def _slice_take_blocks_ax0( | ||
self, slice_or_indexer, fill_value=lib.no_default, only_slice: bool = False | ||
): | ||
""" | ||
Slice/take blocks along axis=0. | ||
|
||
Overloaded for SingleBlock | ||
|
||
Parameters | ||
---------- | ||
slice_or_indexer : slice, ndarray[bool], or list-like of ints | ||
fill_value : scalar, default lib.no_default | ||
only_slice : bool, default False | ||
If True, we always return views on existing arrays, never copies. | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Returns | ||
------- | ||
new_blocks : list of Block | ||
|
@@ -1309,11 +1318,25 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default): | |
blocks.append(newblk) | ||
|
||
else: | ||
blocks.append( | ||
blk.take_nd( | ||
blklocs[mgr_locs.indexer], axis=0, new_mgr_locs=mgr_locs, | ||
) | ||
) | ||
# GH#32779 to avoid the performance penalty of copying, | ||
# we may try to only slice | ||
taker = blklocs[mgr_locs.indexer] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add some comments here about the perf implications of these strategies |
||
max_len = max(len(mgr_locs), taker.max() + 1) | ||
if only_slice: | ||
taker = lib.maybe_indices_to_slice(taker, max_len) | ||
|
||
if isinstance(taker, slice): | ||
nb = blk.getitem_block(taker, new_mgr_locs=mgr_locs) | ||
blocks.append(nb) | ||
elif only_slice: | ||
# GH#33597 slice instead of take, so we get | ||
# views instead of copies | ||
for i, ml in zip(taker, mgr_locs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use a list comprehesnion There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. above on 1317 i can, here it is less clean |
||
nb = blk.getitem_block([i], new_mgr_locs=ml) | ||
blocks.append(nb) | ||
else: | ||
nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs) | ||
blocks.append(nb) | ||
|
||
return blocks | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
logical_op, | ||
) | ||
from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401 | ||
from pandas.core.ops.blockwise import operate_blockwise | ||
from pandas.core.ops.common import unpack_zerodim_and_defer | ||
from pandas.core.ops.dispatch import should_series_dispatch | ||
from pandas.core.ops.docstrings import ( | ||
|
@@ -325,8 +326,9 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): | |
elif isinstance(right, ABCDataFrame): | ||
assert right._indexed_same(left) | ||
|
||
def column_op(a, b): | ||
return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An alternative would be to improve the performance of this line, and avoid the complexity that is added in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. regardless this is worth doing on L343/424 and L348/429 |
||
array_op = get_array_op(func, str_rep=str_rep) | ||
bm = operate_blockwise(left, right, array_op) | ||
return type(left)(bm) | ||
|
||
elif isinstance(right, ABCSeries) and axis == "columns": | ||
# We only get here if called via _combine_series_frame, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from typing import TYPE_CHECKING, List, Tuple | ||
|
||
import numpy as np | ||
|
||
from pandas._typing import ArrayLike | ||
|
||
if TYPE_CHECKING: | ||
from pandas.core.internals.blocks import Block # noqa:F401 | ||
|
||
|
||
def operate_blockwise(left, right, array_op): | ||
assert right._indexed_same(left) | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def get_same_shape_values( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be moved out to a function in the module; this is complex (partialy) because of this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool | ||
) -> Tuple[ArrayLike, ArrayLike]: | ||
""" | ||
Slice lblk.values to align with rblk. Squeeze if we have EAs. | ||
""" | ||
lvals = lblk.values | ||
rvals = rblk.values | ||
|
||
# TODO(EA2D): with 2D EAs pnly this first clause would be needed | ||
if not (left_ea or right_ea): | ||
lvals = lvals[rblk.mgr_locs.indexer, :] | ||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) | ||
elif left_ea and right_ea: | ||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) | ||
elif right_ea: | ||
# lvals are 2D, rvals are 1D | ||
lvals = lvals[rblk.mgr_locs.indexer, :] | ||
assert lvals.shape[0] == 1, lvals.shape | ||
lvals = lvals[0, :] | ||
else: | ||
# lvals are 1D, rvals are 2D | ||
assert rvals.shape[0] == 1, rvals.shape | ||
rvals = rvals[0, :] | ||
|
||
return lvals, rvals | ||
|
||
res_blks: List["Block"] = [] | ||
rmgr = right._mgr | ||
for n, blk in enumerate(left._mgr.blocks): | ||
locs = blk.mgr_locs | ||
blk_vals = blk.values | ||
|
||
left_ea = not isinstance(blk_vals, np.ndarray) | ||
|
||
rblks = rmgr._slice_take_blocks_ax0(locs.indexer, only_slice=True) | ||
|
||
# Assertions are disabled for performance, but should hold: | ||
# if left_ea: | ||
# assert len(locs) == 1, locs | ||
# assert len(rblks) == 1, rblks | ||
# assert rblks[0].shape[0] == 1, rblks[0].shape | ||
|
||
for k, rblk in enumerate(rblks): | ||
right_ea = not isinstance(rblk.values, np.ndarray) | ||
|
||
lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea) | ||
|
||
res_values = array_op(lvals, rvals) | ||
if left_ea and not right_ea and hasattr(res_values, "reshape"): | ||
res_values = res_values.reshape(1, -1) | ||
nbs = rblk._split_op_result(res_values) | ||
|
||
# Assertions are disabled for performance, but should hold: | ||
# if right_ea or left_ea: | ||
# assert len(nbs) == 1 | ||
# else: | ||
# assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape) | ||
|
||
for nb in nbs: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be in a function again this is way too complex to grok There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you do this here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
# Reset mgr_locs to correspond to our original DataFrame | ||
nblocs = locs.as_array[nb.mgr_locs.indexer] | ||
nb.mgr_locs = nblocs | ||
# Assertions are disabled for performance, but should hold: | ||
# assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape) | ||
# assert all(x in locs.as_array for x in nb.mgr_locs.as_array) | ||
|
||
res_blks.extend(nbs) | ||
|
||
# Assertions are disabled for performance, but should hold: | ||
# slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array} | ||
# nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks) | ||
# assert nlocs == len(left.columns), (nlocs, len(left.columns)) | ||
# assert len(slocs) == nlocs, (len(slocs), nlocs) | ||
# assert slocs == set(range(nlocs)), slocs | ||
|
||
new_mgr = type(rmgr)(res_blks, axes=rmgr.axes, do_integrity_check=False) | ||
return new_mgr |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -964,7 +964,9 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): | |
obj = tm.box_expected(dti, box_with_array) | ||
expected = tm.box_expected(expected, box_with_array) | ||
|
||
warn = PerformanceWarning if box_with_array is not pd.DataFrame else None | ||
warn = None | ||
if box_with_array is not pd.DataFrame or tz_naive_fixture is None: | ||
warn = PerformanceWarning | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know why this changed? We do / do not raise a warning on the array-level? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't do a warning when operating |
||
with tm.assert_produces_warning(warn): | ||
result = obj - obj.astype(object) | ||
tm.assert_equal(result, expected) | ||
|
@@ -1444,8 +1446,7 @@ def test_dt64arr_add_mixed_offset_array(self, box_with_array): | |
s = DatetimeIndex([Timestamp("2000-1-1"), Timestamp("2000-2-1")]) | ||
s = tm.box_expected(s, box_with_array) | ||
|
||
warn = None if box_with_array is pd.DataFrame else PerformanceWarning | ||
with tm.assert_produces_warning(warn): | ||
with tm.assert_produces_warning(PerformanceWarning): | ||
other = pd.Index([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()]) | ||
other = tm.box_expected(other, box_with_array) | ||
result = s + other | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it needed to test it with all those different ops?
I think what we are interested in catching with the benchmark, is the general code handling wide dataframes (how the dispatching to the actual op is done, dispathing to block/column instead of series, etc), not the actual op right? So for those aspects, they all use the same code, and testing all ops seems overkill? (it just adds to the number of benchmarks needlessly, making it harder to run the benchmark suite / interpret the results)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No strong opinion here.
Yah, this is a hassle. Best guess is eventually we're going to land on a "throw hardware at the problem" solution, but that doesn't help the local-running troubles.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then let's keep only 2 of them or so, I would say
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how about four: one comparison, one logical, floordiv (for which we have special handling), and one other arithmetic