diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 91575c311b409..66f3d4bd1c4f3 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -488,6 +488,39 @@ Previous Behavior: 0 NaT +.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting: + +DataFrame Arithmetic Operations Broadcasting Changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:class:`DataFrame` arithmetic operations when operating with 2-dimensional +``np.ndarray`` objects now broadcast in the same way as ``np.ndarray``s +broadcast. (:issue:`23000`) + +Previous Behavior: + +.. code-block:: ipython + + In [3]: arr = np.arange(6).reshape(3, 2) + In [4]: df = pd.DataFrame(arr) + In [5]: df + arr[[0], :] # 1 row, 2 columns + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) + In [6]: df + arr[:, [1]] # 1 column, 3 rows + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (3, 1) + +*Current Behavior*: + +.. ipython:: python + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr) + df + +.. ipython:: python + df + arr[[0], :] # 1 row, 2 columns + df + arr[:, [1]] # 1 column, 3 rows + + .. _whatsnew_0240.api.extension: ExtensionType Changes diff --git a/pandas/core/ops.py b/pandas/core/ops.py index dc99faaf68f51..20559bca9caed 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1799,14 +1799,32 @@ def to_series(right): right = to_series(right) elif right.ndim == 2: - if left.shape != right.shape: + if right.shape == left.shape: + right = left._constructor(right, index=left.index, + columns=left.columns) + + elif right.shape[0] == left.shape[0] and right.shape[1] == 1: + # Broadcast across columns + try: + right = np.broadcast_to(right, left.shape) + except AttributeError: + # numpy < 1.10.0 + right = np.tile(right, (1, left.shape[1])) + + right = left._constructor(right, + index=left.index, + columns=left.columns) + + elif right.shape[1] == left.shape[1] and right.shape[0] == 1: + # Broadcast along rows + right = to_series(right[0, :]) + + else: raise ValueError("Unable to coerce to DataFrame, shape " "must be {req_shape}: given {given_shape}" .format(req_shape=left.shape, given_shape=right.shape)) - right = left._constructor(right, index=left.index, - columns=left.columns) elif right.ndim > 2: raise ValueError('Unable to coerce to Series/DataFrame, dim ' 'must be <= 2: {dim}'.format(dim=right.shape)) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 2eb11c3a2e2f7..b97c5e4f7d7c2 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -99,6 +99,7 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): # Arithmetic class TestFrameFlexArithmetic(object): + def test_df_add_td64_columnwise(self): # GH#22534 Check that column-wise addition broadcasts correctly dti = pd.date_range('2016-01-01', periods=10) @@ -252,6 +253,99 @@ def test_arith_flex_zero_len_raises(self): class TestFrameArithmetic(object): + def test_df_add_2d_array_rowlike_broadcasts(self): + # GH#23000 + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + expected = pd.DataFrame([[2, 4], + [4, 6], + [6, 8]], + columns=df.columns, index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype) + result = df + rowlike + tm.assert_frame_equal(result, expected) + result = rowlike + df + tm.assert_frame_equal(result, expected) + + def test_df_add_2d_array_collike_broadcasts(self): + # GH#23000 + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + expected = pd.DataFrame([[1, 2], + [5, 6], + [9, 10]], + columns=df.columns, index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype) + result = df + collike + tm.assert_frame_equal(result, expected) + result = collike + df + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_rowlike_broadcasts(self, + all_arithmetic_operators): + # GH#23000 + opname = all_arithmetic_operators + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + exvals = [getattr(df.loc['A'], opname)(rowlike.squeeze()), + getattr(df.loc['B'], opname)(rowlike.squeeze()), + getattr(df.loc['C'], opname)(rowlike.squeeze())] + + expected = pd.DataFrame(exvals, columns=df.columns, index=df.index) + + if opname in ['__rmod__', '__rfloordiv__']: + # exvals will have dtypes [f8, i8, i8] so expected will be + # all-f8, but the DataFrame operation will return mixed dtypes + # use exvals[-1].dtype instead of "i8" for compat with 32-bit + # systems/pythons + expected[False] = expected[False].astype(exvals[-1].dtype) + + result = getattr(df, opname)(rowlike) + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_collike_broadcasts(self, + all_arithmetic_operators): + # GH#23000 + opname = all_arithmetic_operators + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + exvals = {True: getattr(df[True], opname)(collike.squeeze()), + False: getattr(df[False], opname)(collike.squeeze())} + + dtype = None + if opname in ['__rmod__', '__rfloordiv__']: + # Series ops may return mixed int/float dtypes in cases where + # DataFrame op will return all-float. So we upcast `expected` + dtype = np.common_type(*[x.values for x in exvals.values()]) + + expected = pd.DataFrame(exvals, columns=df.columns, index=df.index, + dtype=dtype) + + result = getattr(df, opname)(collike) + tm.assert_frame_equal(result, expected) + def test_df_bool_mul_int(self): # GH#22047, GH#22163 multiplication by 1 should result in int dtype, # not object dtype