Make DataFrame arithmetic ops with 2D arrays behave like numpy analogues (pandas-dev#23000)

jbrockmendel · tm9k1 · commit 3a7b7c6e3422 · 2018-11-20T02:35:38.000+05:30
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -488,6 +488,39 @@ Previous Behavior:
     0 NaT
 
 
+.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting:
+
+DataFrame Arithmetic Operations Broadcasting Changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+:class:`DataFrame` arithmetic operations when operating with 2-dimensional
+``np.ndarray`` objects now broadcast in the same way as ``np.ndarray``s
+broadcast.  (:issue:`23000`)
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+   In [3]: arr = np.arange(6).reshape(3, 2)
+   In [4]: df = pd.DataFrame(arr)
+   In [5]: df + arr[[0], :]   # 1 row, 2 columns
+   ...
+   ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2)
+   In [6]: df + arr[:, [1]]   # 1 column, 3 rows
+   ...
+   ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (3, 1)
+
+*Current Behavior*:
+
+.. ipython:: python
+   arr = np.arange(6).reshape(3, 2)
+   df = pd.DataFrame(arr)
+   df
+
+.. ipython:: python
+   df + arr[[0], :]   # 1 row, 2 columns
+   df + arr[:, [1]]   # 1 column, 3 rows
+
+
 .. _whatsnew_0240.api.extension:
 
 ExtensionType Changes
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -1799,14 +1799,32 @@ def to_series(right):
             right = to_series(right)
 
         elif right.ndim == 2:
-            if left.shape != right.shape:
+            if right.shape == left.shape:
+                right = left._constructor(right, index=left.index,
+                                          columns=left.columns)
+
+            elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
+                # Broadcast across columns
+                try:
+                    right = np.broadcast_to(right, left.shape)
+                except AttributeError:
+                    # numpy < 1.10.0
+                    right = np.tile(right, (1, left.shape[1]))
+
+                right = left._constructor(right,
+                                          index=left.index,
+                                          columns=left.columns)
+
+            elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
+                # Broadcast along rows
+                right = to_series(right[0, :])
+
+            else:
                 raise ValueError("Unable to coerce to DataFrame, shape "
                                  "must be {req_shape}: given {given_shape}"
                                  .format(req_shape=left.shape,
                                          given_shape=right.shape))
 
-            right = left._constructor(right, index=left.index,
-                                      columns=left.columns)
         elif right.ndim > 2:
             raise ValueError('Unable to coerce to Series/DataFrame, dim '
                              'must be <= 2: {dim}'.format(dim=right.shape))
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -99,6 +99,7 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname):
 # Arithmetic
 
 class TestFrameFlexArithmetic(object):
+
     def test_df_add_td64_columnwise(self):
         # GH#22534 Check that column-wise addition broadcasts correctly
         dti = pd.date_range('2016-01-01', periods=10)
@@ -252,6 +253,99 @@ def test_arith_flex_zero_len_raises(self):
 
 
 class TestFrameArithmetic(object):
+    def test_df_add_2d_array_rowlike_broadcasts(self):
+        # GH#23000
+        arr = np.arange(6).reshape(3, 2)
+        df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C'])
+
+        rowlike = arr[[1], :]  # shape --> (1, ncols)
+        assert rowlike.shape == (1, df.shape[1])
+
+        expected = pd.DataFrame([[2, 4],
+                                 [4, 6],
+                                 [6, 8]],
+                                columns=df.columns, index=df.index,
+                                # specify dtype explicitly to avoid failing
+                                # on 32bit builds
+                                dtype=arr.dtype)
+        result = df + rowlike
+        tm.assert_frame_equal(result, expected)
+        result = rowlike + df
+        tm.assert_frame_equal(result, expected)
+
+    def test_df_add_2d_array_collike_broadcasts(self):
+        # GH#23000
+        arr = np.arange(6).reshape(3, 2)
+        df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C'])
+
+        collike = arr[:, [1]]  # shape --> (nrows, 1)
+        assert collike.shape == (df.shape[0], 1)
+
+        expected = pd.DataFrame([[1, 2],
+                                 [5, 6],
+                                 [9, 10]],
+                                columns=df.columns, index=df.index,
+                                # specify dtype explicitly to avoid failing
+                                # on 32bit builds
+                                dtype=arr.dtype)
+        result = df + collike
+        tm.assert_frame_equal(result, expected)
+        result = collike + df
+        tm.assert_frame_equal(result, expected)
+
+    def test_df_arith_2d_array_rowlike_broadcasts(self,
+                                                  all_arithmetic_operators):
+        # GH#23000
+        opname = all_arithmetic_operators
+
+        arr = np.arange(6).reshape(3, 2)
+        df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C'])
+
+        rowlike = arr[[1], :]  # shape --> (1, ncols)
+        assert rowlike.shape == (1, df.shape[1])
+
+        exvals = [getattr(df.loc['A'], opname)(rowlike.squeeze()),
+                  getattr(df.loc['B'], opname)(rowlike.squeeze()),
+                  getattr(df.loc['C'], opname)(rowlike.squeeze())]
+
+        expected = pd.DataFrame(exvals, columns=df.columns, index=df.index)
+
+        if opname in ['__rmod__', '__rfloordiv__']:
+            # exvals will have dtypes [f8, i8, i8] so expected will be
+            #   all-f8, but the DataFrame operation will return mixed dtypes
+            # use exvals[-1].dtype instead of "i8" for compat with 32-bit
+            # systems/pythons
+            expected[False] = expected[False].astype(exvals[-1].dtype)
+
+        result = getattr(df, opname)(rowlike)
+        tm.assert_frame_equal(result, expected)
+
+    def test_df_arith_2d_array_collike_broadcasts(self,
+                                                  all_arithmetic_operators):
+        # GH#23000
+        opname = all_arithmetic_operators
+
+        arr = np.arange(6).reshape(3, 2)
+        df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C'])
+
+        collike = arr[:, [1]]  # shape --> (nrows, 1)
+        assert collike.shape == (df.shape[0], 1)
+
+        exvals = {True: getattr(df[True], opname)(collike.squeeze()),
+                  False: getattr(df[False], opname)(collike.squeeze())}
+
+        dtype = None
+        if opname in ['__rmod__', '__rfloordiv__']:
+            # Series ops may return mixed int/float dtypes in cases where
+            #   DataFrame op will return all-float.  So we upcast `expected`
+            dtype = np.common_type(*[x.values for x in exvals.values()])
+
+        expected = pd.DataFrame(exvals, columns=df.columns, index=df.index,
+                                dtype=dtype)
+
+        result = getattr(df, opname)(collike)
+        tm.assert_frame_equal(result, expected)
+
     def test_df_bool_mul_int(self):
         # GH#22047, GH#22163 multiplication by 1 should result in int dtype,
         # not object dtype