Skip to content

REF: avoid broadcasting of Series to DataFrame in ops for ArrayManager #40482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2 changes: 0 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6674,8 +6674,6 @@ def _dispatch_frame_op(self, right, func: Callable, axis: Optional[int] = None):
assert right.index.equals(self.columns)

right = right._values
# maybe_align_as_frame ensures we do not have an ndarray here
assert not isinstance(right, np.ndarray)

arrays = [
array_op(_left, _right)
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,11 @@ def to_series(right):
left, right = left.align(
right, join="outer", axis=axis, level=level, copy=False
)
right = _maybe_align_series_as_frame(left, right, axis)

from pandas.core.internals import BlockManager

if isinstance(left._mgr, BlockManager):
right = _maybe_align_series_as_frame(left, right, axis)

return left, right

Expand Down
23 changes: 18 additions & 5 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1506,7 +1506,13 @@ def test_dt64arr_add_sub_DateOffset(self, box_with_array):
@pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub])
@pytest.mark.parametrize("box_other", [True, False])
def test_dt64arr_add_sub_offset_array(
self, tz_naive_fixture, box_with_array, box_other, op, other
self,
tz_naive_fixture,
box_with_array,
box_other,
op,
other,
using_array_manager,
):
# GH#18849
# GH#10699 array of offsets
Expand All @@ -1522,7 +1528,10 @@ def test_dt64arr_add_sub_offset_array(
if box_other:
other = tm.box_expected(other, box_with_array)

with tm.assert_produces_warning(PerformanceWarning):
warn = PerformanceWarning
if box_with_array is pd.DataFrame and not box_other and using_array_manager:
warn = None
with tm.assert_produces_warning(warn):
res = op(dtarr, other)

tm.assert_equal(res, expected)
Expand Down Expand Up @@ -2451,7 +2460,7 @@ def test_dti_addsub_offset_arraylike(

@pytest.mark.parametrize("other_box", [pd.Index, np.array])
def test_dti_addsub_object_arraylike(
self, tz_naive_fixture, box_with_array, other_box
self, tz_naive_fixture, box_with_array, other_box, using_array_manager
):
tz = tz_naive_fixture

Expand All @@ -2463,14 +2472,18 @@ def test_dti_addsub_object_arraylike(
expected = DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture)
expected = tm.box_expected(expected, xbox)

with tm.assert_produces_warning(PerformanceWarning):
warn = PerformanceWarning
if box_with_array is pd.DataFrame and using_array_manager:
warn = None

with tm.assert_produces_warning(warn):
result = dtarr + other
tm.assert_equal(result, expected)

expected = DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture)
expected = tm.box_expected(expected, xbox)

with tm.assert_produces_warning(PerformanceWarning):
with tm.assert_produces_warning(warn):
result = dtarr - other
tm.assert_equal(result, expected)

Expand Down
75 changes: 45 additions & 30 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1323,7 +1323,7 @@ def test_td64arr_sub_timedeltalike(self, two_hours, box_with_array):
# ------------------------------------------------------------------
# __add__/__sub__ with DateOffsets and arrays of DateOffsets

def test_td64arr_add_offset_index(self, names, box_with_array):
def test_td64arr_add_offset_index(self, names, box_with_array, using_array_manager):
# GH#18849, GH#19744
box = box_with_array
exname = get_expected_name(box, names)
Expand All @@ -1338,17 +1338,21 @@ def test_td64arr_add_offset_index(self, names, box_with_array):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, box)

with tm.assert_produces_warning(PerformanceWarning):
warn = PerformanceWarning
if box is DataFrame and using_array_manager:
warn = None

with tm.assert_produces_warning(warn):
res = tdi + other
tm.assert_equal(res, expected)

with tm.assert_produces_warning(PerformanceWarning):
with tm.assert_produces_warning(warn):
res2 = other + tdi
tm.assert_equal(res2, expected)

# TODO: combine with test_td64arr_add_offset_index by parametrizing
# over second box?
def test_td64arr_add_offset_array(self, box_with_array):
def test_td64arr_add_offset_array(self, box_with_array, using_array_manager):
# GH#18849
box = box_with_array
tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"])
Expand All @@ -1361,15 +1365,19 @@ def test_td64arr_add_offset_array(self, box_with_array):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, box)

with tm.assert_produces_warning(PerformanceWarning):
warn = PerformanceWarning
if box is DataFrame and using_array_manager:
warn = None

with tm.assert_produces_warning(warn):
res = tdi + other
tm.assert_equal(res, expected)

with tm.assert_produces_warning(PerformanceWarning):
with tm.assert_produces_warning(warn):
res2 = other + tdi
tm.assert_equal(res2, expected)

def test_td64arr_sub_offset_index(self, names, box_with_array):
def test_td64arr_sub_offset_index(self, names, box_with_array, using_array_manager):
# GH#18824, GH#19744
box = box_with_array
xbox = box if box not in [tm.to_array, pd.array] else pd.Index
Expand All @@ -1385,11 +1393,15 @@ def test_td64arr_sub_offset_index(self, names, box_with_array):
tdi = tm.box_expected(tdi, box)
expected = tm.box_expected(expected, xbox)

with tm.assert_produces_warning(PerformanceWarning):
warn = PerformanceWarning
if box is DataFrame and using_array_manager:
warn = None

with tm.assert_produces_warning(warn):
res = tdi - other
tm.assert_equal(res, expected)

def test_td64arr_sub_offset_array(self, box_with_array):
def test_td64arr_sub_offset_array(self, box_with_array, using_array_manager):
# GH#18824
tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"])
other = np.array([offsets.Hour(n=1), offsets.Minute(n=-2)])
Expand All @@ -1401,11 +1413,17 @@ def test_td64arr_sub_offset_array(self, box_with_array):
tdi = tm.box_expected(tdi, box_with_array)
expected = tm.box_expected(expected, box_with_array)

with tm.assert_produces_warning(PerformanceWarning):
warn = PerformanceWarning
if box_with_array is DataFrame and using_array_manager:
warn = None

with tm.assert_produces_warning(warn):
res = tdi - other
tm.assert_equal(res, expected)

def test_td64arr_with_offset_series(self, names, box_with_array):
def test_td64arr_with_offset_series(
self, names, box_with_array, using_array_manager
):
# GH#18849
box = box_with_array
box2 = Series if box in [pd.Index, tm.to_array, pd.array] else box
Expand All @@ -1418,18 +1436,22 @@ def test_td64arr_with_offset_series(self, names, box_with_array):
obj = tm.box_expected(tdi, box)
expected_add = tm.box_expected(expected_add, box2)

with tm.assert_produces_warning(PerformanceWarning):
warn = PerformanceWarning
if box is DataFrame and using_array_manager:
warn = None

with tm.assert_produces_warning(warn):
res = obj + other
tm.assert_equal(res, expected_add)

with tm.assert_produces_warning(PerformanceWarning):
with tm.assert_produces_warning(warn):
res2 = other + obj
tm.assert_equal(res2, expected_add)

expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))], name=exname)
expected_sub = tm.box_expected(expected_sub, box2)

with tm.assert_produces_warning(PerformanceWarning):
with tm.assert_produces_warning(warn):
res3 = obj - other
tm.assert_equal(res3, expected_sub)

Expand Down Expand Up @@ -1460,7 +1482,7 @@ def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_with_array):
# ------------------------------------------------------------------
# Unsorted

def test_td64arr_add_sub_object_array(self, box_with_array):
def test_td64arr_add_sub_object_array(self, box_with_array, using_array_manager):
box = box_with_array
xbox = np.ndarray if box is pd.array else box

Expand All @@ -1469,7 +1491,11 @@ def test_td64arr_add_sub_object_array(self, box_with_array):

other = np.array([Timedelta(days=1), offsets.Day(2), Timestamp("2000-01-04")])

with tm.assert_produces_warning(PerformanceWarning):
warn = PerformanceWarning
if box is DataFrame and using_array_manager:
warn = None

with tm.assert_produces_warning(warn):
result = tdarr + other

expected = pd.Index(
Expand All @@ -1480,10 +1506,10 @@ def test_td64arr_add_sub_object_array(self, box_with_array):

msg = "unsupported operand type|cannot subtract a datelike"
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(PerformanceWarning):
with tm.assert_produces_warning(warn):
tdarr - other

with tm.assert_produces_warning(PerformanceWarning):
with tm.assert_produces_warning(warn):
result = other - tdarr

expected = pd.Index([Timedelta(0), Timedelta(0), Timestamp("2000-01-01")])
Expand Down Expand Up @@ -2051,9 +2077,7 @@ def test_td64arr_rmul_numeric_array(self, box_with_array, vector, any_real_dtype
[np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])],
ids=lambda x: type(x).__name__,
)
def test_td64arr_div_numeric_array(
self, box_with_array, vector, any_real_dtype, using_array_manager
):
def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype):
# GH#4521
# divide/multiply by integers
xbox = get_upcast_box(box_with_array, vector)
Expand Down Expand Up @@ -2089,15 +2113,6 @@ def test_td64arr_div_numeric_array(
expected = pd.Index(expected) # do dtype inference
expected = tm.box_expected(expected, xbox)
assert tm.get_dtype(expected) == "m8[ns]"

if using_array_manager and box_with_array is DataFrame:
# TODO the behaviour is buggy here (third column with all-NaT
# as result doesn't get preserved as timedelta64 dtype).
# Reported at https://github.com/pandas-dev/pandas/issues/39750
# Changing the expected instead of xfailing to continue to test
# the correct behaviour for the other columns
expected[2] = Series([NaT, NaT], dtype=object)

tm.assert_equal(result, expected)

with pytest.raises(TypeError, match=pattern):
Expand Down