diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 856a1823353d5..b74de3ce0d696 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -54,7 +54,7 @@ ) if TYPE_CHECKING: - from pandas import DataFrame # noqa:F401 + from pandas import DataFrame, Series # noqa:F401 # ----------------------------------------------------------------------------- # constants @@ -467,19 +467,7 @@ def _combine_series_frame(left, right, func, axis: int): # We assume that self.align(other, ...) has already been called rvalues = right._values - if isinstance(rvalues, np.ndarray): - # TODO(EA2D): no need to special-case with 2D EAs - # We can operate block-wise - if axis == 0: - rvalues = rvalues.reshape(-1, 1) - else: - rvalues = rvalues.reshape(1, -1) - - rvalues = np.broadcast_to(rvalues, left.shape) - - array_op = get_array_op(func) - bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"]) - return type(left)(bm) + assert not isinstance(rvalues, np.ndarray) # handled by align_series_as_frame if axis == 0: new_data = dispatch_to_series(left, right, func) @@ -575,6 +563,7 @@ def to_series(right): left, right = left.align( right, join="outer", axis=axis, level=level, copy=False ) + right = _maybe_align_series_as_frame(left, right, axis) return left, right @@ -635,6 +624,25 @@ def _frame_arith_method_with_reindex( return result.reindex(join_columns, axis=1) +def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int): + """ + If the Series operand is not EA-dtype, we can broadcast to 2D and operate + blockwise. + """ + rvalues = series._values + if not isinstance(rvalues, np.ndarray): + # TODO(EA2D): no need to special-case with 2D EAs + return series + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) + else: + rvalues = rvalues.reshape(1, -1) + + rvalues = np.broadcast_to(rvalues, frame.shape) + return type(frame)(rvalues, index=frame.index, columns=frame.columns) + + def _arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): # This is the only function where `special` can be either True or False op_name = _get_op_name(op, special) @@ -656,6 +664,11 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): ): return _frame_arith_method_with_reindex(self, other, op) + if isinstance(other, ABCSeries) and fill_value is not None: + # TODO: We could allow this in cases where we end up going + # through the DataFrame path + raise NotImplementedError(f"fill_value {fill_value} not supported.") + # TODO: why are we passing flex=True instead of flex=not special? # 15 tests fail if we pass flex=not special instead self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) @@ -665,9 +678,6 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): new_data = self._combine_frame(other, na_op, fill_value) elif isinstance(other, ABCSeries): - if fill_value is not None: - raise NotImplementedError(f"fill_value {fill_value} not supported.") - axis = self._get_axis_number(axis) if axis is not None else 1 new_data = _combine_series_frame(self, other, op, axis=axis) else: diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 1fec059f11df5..f94408d657ae5 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1470,8 +1470,6 @@ def test_td64arr_add_sub_object_array(self, box_with_array): [pd.Timedelta(days=2), pd.Timedelta(days=4), pd.Timestamp("2000-01-07")] ) expected = tm.box_expected(expected, box_with_array) - if box_with_array is pd.DataFrame: - expected = expected.astype(object) tm.assert_equal(result, expected) msg = "unsupported operand type|cannot subtract a datelike" @@ -1486,8 +1484,6 @@ def test_td64arr_add_sub_object_array(self, box_with_array): [pd.Timedelta(0), pd.Timedelta(0), pd.Timestamp("2000-01-01")] ) expected = tm.box_expected(expected, box_with_array) - if box_with_array is pd.DataFrame: - expected = expected.astype(object) tm.assert_equal(result, expected) @@ -2012,7 +2008,7 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype) tm.assert_equal(result, expected) pattern = ( - "true_divide cannot use operands|" + "true_divide'? cannot use operands|" "cannot perform __div__|" "cannot perform __truediv__|" "unsupported operand|" diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 5cb27c697a64d..b8ca5f16e4060 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -339,6 +339,20 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): result = getattr(empty, opname)(const).dtypes.value_counts() tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)])) + def test_df_flex_cmp_ea_dtype_with_ndarray_series(self): + ii = pd.IntervalIndex.from_breaks([1, 2, 3]) + df = pd.DataFrame({"A": ii, "B": ii}) + + ser = pd.Series([0, 0]) + res = df.eq(ser, axis=0) + + expected = pd.DataFrame({"A": [False, False], "B": [False, False]}) + tm.assert_frame_equal(res, expected) + + ser2 = pd.Series([1, 2], index=["A", "B"]) + res2 = df.eq(ser2, axis=1) + tm.assert_frame_equal(res2, expected) + # ------------------------------------------------------------------- # Arithmetic @@ -1410,12 +1424,13 @@ def test_alignment_non_pandas(self): range(1, 4), ]: - tm.assert_series_equal( - align(df, val, "index")[1], Series([1, 2, 3], index=df.index) - ) - tm.assert_series_equal( - align(df, val, "columns")[1], Series([1, 2, 3], index=df.columns) + expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index) + tm.assert_frame_equal(align(df, val, "index")[1], expected) + + expected = DataFrame( + {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index ) + tm.assert_frame_equal(align(df, val, "columns")[1], expected) # length mismatch msg = "Unable to coerce to Series, length must be 3: given 2" @@ -1484,3 +1499,18 @@ def test_pow_nan_with_zero(): result = left["A"] ** right["A"] tm.assert_series_equal(result, expected["A"]) + + +def test_dataframe_series_extension_dtypes(): + # https://github.com/pandas-dev/pandas/issues/34311 + df = pd.DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"]) + ser = pd.Series([1, 2, 3], index=["a", "b", "c"]) + + expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3) + expected = pd.DataFrame(expected, columns=df.columns, dtype="Int64") + + df_ea = df.astype("Int64") + result = df_ea + ser + tm.assert_frame_equal(result, expected) + result = df_ea + ser.astype("Int64") + tm.assert_frame_equal(result, expected)