From baf4e38a16062775550f50e8350ea9492696291a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 20 May 2020 08:10:35 -0700 Subject: [PATCH 1/8] test for axis=1 case --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/internals/blocks.py | 12 ++++++++++++ pandas/tests/frame/test_arithmetic.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a3499f857d158..98331be285dc0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -686,6 +686,7 @@ Numeric - Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). - Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`) - Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`) +- Bug in :class:`DataFrame` flex arithmetic methods with ExtensionDtypes (:issue:`????`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c052c6c9d7d1d..4e18409e3eb3c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -332,11 +332,23 @@ def apply(self, func, **kwargs) -> List["Block"]: apply the function to my values; return a block if we are not one """ + kwargs = self._ensure_same_shape_values(kwargs) + with np.errstate(all="ignore"): result = func(self.values, **kwargs) return self._split_op_result(result) + def _ensure_same_shape_values(self, kwargs): + # TODO(EA2D): kludge for arithmetic not needed with 2D EA + if self.ndim == 2 and self.values.ndim == 1: + if "right" in kwargs and isinstance(kwargs["right"], np.ndarray): + right = kwargs["right"] + if right.ndim == 2: + assert right.shape == (1, len(self.values)), right.shape + kwargs["right"] = right[0] + return kwargs + def _split_op_result(self, result) -> List["Block"]: # See also: split_and_operate if is_extension_array_dtype(result) and result.ndim > 1: diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 5cb27c697a64d..44869b6ace2d5 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -339,6 +339,20 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): result = getattr(empty, opname)(const).dtypes.value_counts() tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)])) + def test_df_flex_cmp_ea_dtype_with_ndarray_series(self): + ii = pd.IntervalIndex.from_breaks([1, 2, 3]) + df = pd.DataFrame({"A": ii, "B": ii}) + + ser = pd.Series([0, 0]) + res = df.eq(ser, axis=0) + + expected = pd.DataFrame({"A": [False, False], "B": [False, False]}) + tm.assert_frame_equal(res, expected) + + ser2 = pd.Series([1, 2], index=["A", "B"]) + res2 = df.eq(ser2, axis=1) + tm.assert_frame_equal(res2, expected) + # ------------------------------------------------------------------- # Arithmetic From ea3aabad05666efb32c33a8e221d315240a48b59 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 20 May 2020 09:06:22 -0700 Subject: [PATCH 2/8] GH ref --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 98331be285dc0..33767ee5a1eec 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -686,7 +686,7 @@ Numeric - Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). - Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`) - Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`) -- Bug in :class:`DataFrame` flex arithmetic methods with ExtensionDtypes (:issue:`????`) +- Bug in :class:`DataFrame` flex arithmetic methods with ExtensionDtypes (:issue:`34277`) Conversion ^^^^^^^^^^ From 612fab6c788ca85c80cc085a0a0f919f79defd60 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 20 May 2020 14:32:40 -0700 Subject: [PATCH 3/8] REF: send broadcastable Series through DataFrame route --- pandas/core/internals/blocks.py | 11 -------- pandas/core/ops/__init__.py | 29 ++++++++++++++++++--- pandas/core/ops/array_ops.py | 3 +++ pandas/tests/arithmetic/test_timedelta64.py | 6 +---- pandas/tests/frame/test_arithmetic.py | 11 ++++---- 5 files changed, 35 insertions(+), 25 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4e18409e3eb3c..19f9730d33426 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -332,23 +332,12 @@ def apply(self, func, **kwargs) -> List["Block"]: apply the function to my values; return a block if we are not one """ - kwargs = self._ensure_same_shape_values(kwargs) with np.errstate(all="ignore"): result = func(self.values, **kwargs) return self._split_op_result(result) - def _ensure_same_shape_values(self, kwargs): - # TODO(EA2D): kludge for arithmetic not needed with 2D EA - if self.ndim == 2 and self.values.ndim == 1: - if "right" in kwargs and isinstance(kwargs["right"], np.ndarray): - right = kwargs["right"] - if right.ndim == 2: - assert right.shape == (1, len(self.values)), right.shape - kwargs["right"] = right[0] - return kwargs - def _split_op_result(self, result) -> List["Block"]: # See also: split_and_operate if is_extension_array_dtype(result) and result.ndim > 1: diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 585e6d0eb0811..07e35c5621dcb 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -57,7 +57,7 @@ ) if TYPE_CHECKING: - from pandas import DataFrame # noqa:F401 + from pandas import DataFrame, Series # noqa:F401 # ----------------------------------------------------------------------------- # constants @@ -619,6 +619,7 @@ def to_series(right): left, right = left.align( right, join="outer", axis=axis, level=level, copy=False ) + right = _align_series_as_frame(left, right, axis) return left, right @@ -679,6 +680,25 @@ def _frame_arith_method_with_reindex( return result.reindex(join_columns, axis=1) +def _align_series_as_frame(frame: "DataFrame", series: "Series", axis: int): + """ + If the Series operand is not EA-dtype, we can broadcast to 2D and operate + blockwise. + """ + rvalues = series._values + if not isinstance(rvalues, np.ndarray): + # TODO(EA2D): no need to special-case with 2D EAs + return series + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) + else: + rvalues = rvalues.reshape(1, -1) + + rvalues = np.broadcast_to(rvalues, frame.shape) + return type(frame)(rvalues, index=frame.index, columns=frame.columns) + + def _arith_method_FRAME(cls, op, special): str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @@ -701,6 +721,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): ): return _frame_arith_method_with_reindex(self, other, op) + if isinstance(other, ABCSeries) and fill_value is not None: + # TODO: We could allow this in cases where we end up going + # through the DataFrame path + raise NotImplementedError(f"fill_value {fill_value} not supported.") self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): @@ -716,9 +740,6 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): pass_op = op if axis in [0, "columns", None] else na_op pass_op = pass_op if not is_logical else op - if fill_value is not None: - raise NotImplementedError(f"fill_value {fill_value} not supported.") - axis = self._get_axis_number(axis) if axis is not None else 1 new_data = _combine_series_frame( self, other, pass_op, axis=axis, str_rep=str_rep diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index eef42592d2b30..e48620ba7e263 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -160,6 +160,9 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal # In this case we do not fall back to the masked op, as that # will handle complex numbers incorrectly, see GH#32047 raise + if left.dtype.kind == "m": + # TODO: More systematic + raise result = masked_arith_op(left, right, op) if is_cmp and (is_scalar(result) or result is NotImplemented): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 904846c5fa099..9eef69168c47b 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1467,8 +1467,6 @@ def test_td64arr_add_sub_object_array(self, box_with_array): [pd.Timedelta(days=2), pd.Timedelta(days=4), pd.Timestamp("2000-01-07")] ) expected = tm.box_expected(expected, box_with_array) - if box_with_array is pd.DataFrame: - expected = expected.astype(object) tm.assert_equal(result, expected) msg = "unsupported operand type|cannot subtract a datelike" @@ -1483,8 +1481,6 @@ def test_td64arr_add_sub_object_array(self, box_with_array): [pd.Timedelta(0), pd.Timedelta(0), pd.Timestamp("2000-01-01")] ) expected = tm.box_expected(expected, box_with_array) - if box_with_array is pd.DataFrame: - expected = expected.astype(object) tm.assert_equal(result, expected) @@ -2009,7 +2005,7 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype) tm.assert_equal(result, expected) pattern = ( - "true_divide cannot use operands|" + "true_divide'? cannot use operands|" "cannot perform __div__|" "cannot perform __truediv__|" "unsupported operand|" diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 44869b6ace2d5..4e9348aee7608 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1424,12 +1424,13 @@ def test_alignment_non_pandas(self): range(1, 4), ]: - tm.assert_series_equal( - align(df, val, "index")[1], Series([1, 2, 3], index=df.index) - ) - tm.assert_series_equal( - align(df, val, "columns")[1], Series([1, 2, 3], index=df.columns) + expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index) + tm.assert_frame_equal(align(df, val, "index")[1], expected) + + expected = DataFrame( + {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index ) + tm.assert_frame_equal(align(df, val, "columns")[1], expected) # length mismatch msg = "Unable to coerce to Series, length must be 3: given 2" From b8577214632e39c3e632f3471d9ca558e4d7f3a7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 20 May 2020 15:38:13 -0700 Subject: [PATCH 4/8] REF: re-use DataFrame dispatch code --- pandas/core/internals/blocks.py | 1 - pandas/core/ops/__init__.py | 14 +------------- pandas/core/ops/array_ops.py | 10 ++++++---- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 19f9730d33426..c052c6c9d7d1d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -332,7 +332,6 @@ def apply(self, func, **kwargs) -> List["Block"]: apply the function to my values; return a block if we are not one """ - with np.errstate(all="ignore"): result = func(self.values, **kwargs) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 07e35c5621dcb..61ba4d5416d29 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -511,19 +511,7 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str): # We assume that self.align(other, ...) has already been called rvalues = right._values - if isinstance(rvalues, np.ndarray): - # TODO(EA2D): no need to special-case with 2D EAs - # We can operate block-wise - if axis == 0: - rvalues = rvalues.reshape(-1, 1) - else: - rvalues = rvalues.reshape(1, -1) - - rvalues = np.broadcast_to(rvalues, left.shape) - - array_op = get_array_op(func, str_rep=str_rep) - bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"]) - return type(left)(bm) + assert not isinstance(rvalues, np.ndarray) # handled by align_series_as_frame if axis == 0: new_data = dispatch_to_series(left, right, func) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index e48620ba7e263..bf5a9ea384ec4 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -160,9 +160,6 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal # In this case we do not fall back to the masked op, as that # will handle complex numbers incorrectly, see GH#32047 raise - if left.dtype.kind == "m": - # TODO: More systematic - raise result = masked_arith_op(left, right, op) if is_cmp and (is_scalar(result) or result is NotImplemented): @@ -389,8 +386,13 @@ def get_array_op(op, str_rep: Optional[str] = None): Returns ------- - function + functools.partial """ + if isinstance(op, partial): + # We get here via dispatch_to_series in DataFrame case + # TODO: try to aovid getting here + return op + op_name = op.__name__.strip("_") if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: return partial(comparison_op, op=op, str_rep=str_rep) From ddb9fa3501c7ae3c29cf6822c5c1d4f92241ee19 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 20 May 2020 18:30:18 -0700 Subject: [PATCH 5/8] typo fixup --- pandas/core/ops/array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index bf5a9ea384ec4..c559893c307a2 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -390,7 +390,7 @@ def get_array_op(op, str_rep: Optional[str] = None): """ if isinstance(op, partial): # We get here via dispatch_to_series in DataFrame case - # TODO: try to aovid getting here + # TODO: try to avoid getting here return op op_name = op.__name__.strip("_") From 86eaed97ffcd571679169b3dcffd11ec97125a48 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 22 May 2020 10:43:39 -0700 Subject: [PATCH 6/8] port test from #34312 --- pandas/tests/frame/test_arithmetic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 4e9348aee7608..b8ca5f16e4060 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1499,3 +1499,18 @@ def test_pow_nan_with_zero(): result = left["A"] ** right["A"] tm.assert_series_equal(result, expected["A"]) + + +def test_dataframe_series_extension_dtypes(): + # https://github.com/pandas-dev/pandas/issues/34311 + df = pd.DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"]) + ser = pd.Series([1, 2, 3], index=["a", "b", "c"]) + + expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3) + expected = pd.DataFrame(expected, columns=df.columns, dtype="Int64") + + df_ea = df.astype("Int64") + result = df_ea + ser + tm.assert_frame_equal(result, expected) + result = df_ea + ser.astype("Int64") + tm.assert_frame_equal(result, expected) From a99028e4bd27f63e81d8a2c4ae509bd9836054a4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 22 May 2020 11:01:59 -0700 Subject: [PATCH 7/8] revert whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9704d17ace866..19db7dcb4b83e 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -687,7 +687,6 @@ Numeric - Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). - Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`) - Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`) -- Bug in :class:`DataFrame` flex arithmetic methods with ExtensionDtypes (:issue:`34277`) Conversion ^^^^^^^^^^ From ee179900c91cd5f9c991cd613088d09d455c5a8a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 May 2020 07:54:14 -0700 Subject: [PATCH 8/8] rename --- pandas/core/ops/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 1931ee4df9cbe..b74de3ce0d696 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -563,7 +563,7 @@ def to_series(right): left, right = left.align( right, join="outer", axis=axis, level=level, copy=False ) - right = _align_series_as_frame(left, right, axis) + right = _maybe_align_series_as_frame(left, right, axis) return left, right @@ -624,7 +624,7 @@ def _frame_arith_method_with_reindex( return result.reindex(join_columns, axis=1) -def _align_series_as_frame(frame: "DataFrame", series: "Series", axis: int): +def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int): """ If the Series operand is not EA-dtype, we can broadcast to 2D and operate blockwise.