From 1697252a73785bb4ad1bfff82304d5c37534897f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 17 Mar 2020 09:59:21 -0700
Subject: [PATCH 01/32] PERF: block-wise arithmetic for frame-with-frame

---
 pandas/core/arrays/datetimelike.py         |  2 +-
 pandas/core/ops/__init__.py                | 72 +++++++++++++++++++++-
 pandas/core/ops/array_ops.py               |  2 +-
 pandas/tests/arithmetic/common.py          |  9 ++-
 pandas/tests/arithmetic/test_datetime64.py |  7 ++-
 5 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 105d9581b1a25..46f2c239b4193 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1299,7 +1299,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
         result : same class as self
         """
         assert op in [operator.add, operator.sub]
-        if len(other) == 1:
+        if len(other) == 1 and self.ndim == other.ndim == 1:
             return op(self, other[0])
 
         warnings.warn(
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 3153a9ac28c10..f80ad80e15d7b 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -5,7 +5,7 @@
 """
 import datetime
 import operator
-from typing import TYPE_CHECKING, Optional, Set, Tuple
+from typing import TYPE_CHECKING, List, Optional, Set, Tuple
 
 import numpy as np
 
@@ -58,6 +58,7 @@
 
 if TYPE_CHECKING:
     from pandas import DataFrame  # noqa:F401
+    from pandas.core.internals.blocks import Block  # noqa: F401
 
 # -----------------------------------------------------------------------------
 # constants
@@ -353,6 +354,70 @@ def fill_binop(left, right, fill_value):
 # Dispatch logic
 
 
+def operate_blockwise(left, right, array_op):
+    assert right._indexed_same(left)
+
+    res_blks: List["Block"] = []
+    rmgr = right._data
+    for n, blk in enumerate(left._data.blocks):
+        locs = blk.mgr_locs
+
+        blk_vals = blk.values
+
+        if not isinstance(blk_vals, np.ndarray):
+            # 1D EA
+            assert len(locs) == 1, locs
+            rser = right.iloc[:, locs[0]]
+            rvals = extract_array(rser, extract_numpy=True)
+            res_values = array_op(blk_vals, rvals)
+            nbs = blk._split_op_result(res_values)
+            res_blks.extend(nbs)
+            continue
+
+        rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
+
+        for k, rblk in enumerate(rblks):
+            lvals = blk_vals[rblk.mgr_locs.indexer, :]
+            rvals = rblk.values
+
+            if not isinstance(rvals, np.ndarray):
+                # 1D EA
+                assert lvals.shape[0] == 1, lvals.shape
+                lvals = lvals[0, :]
+                res_values = array_op(lvals, rvals)
+                nbs = rblk._split_op_result(res_values)
+                assert len(nbs) == 1
+                nb = nbs[0]
+                nb.mgr_locs = locs.as_array[nb.mgr_locs]
+                res_blks.append(nb)
+                continue
+
+            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+
+            res_values = array_op(lvals, rvals)
+            assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
+            nbs = rblk._split_op_result(res_values)
+            for nb in nbs:
+                # TODO: maybe optimize by sticking with slices?
+                nb_mgr_locs = nb.mgr_locs
+                nblocs = locs.as_array[nb_mgr_locs.indexer]
+                nb.mgr_locs = nblocs
+                assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
+                assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
+
+            res_blks.extend(nbs)
+
+    slocs = set(y for nb in res_blks for y in nb.mgr_locs.as_array)
+    nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
+    assert nlocs == len(left.columns), (nlocs, len(left.columns))
+    assert len(slocs) == nlocs, (len(slocs), nlocs)
+    assert slocs == set(range(nlocs)), slocs
+
+    # TODO: once this is working, pass do_integrity_check=False
+    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes)
+    return new_mgr
+
+
 def dispatch_to_series(left, right, func, str_rep=None, axis=None):
     """
     Evaluate the frame operation func(left, right) by evaluating
@@ -385,8 +450,9 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None):
     elif isinstance(right, ABCDataFrame):
         assert right._indexed_same(left)
 
-        def column_op(a, b):
-            return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}
+        array_op = get_array_op(func, str_rep=str_rep)
+        bm = operate_blockwise(left, right, array_op)
+        return type(left)(bm)
 
     elif isinstance(right, ABCSeries) and axis == "columns":
         # We only get here if called via _combine_series_frame,
diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index e285c53d9813e..dcef6d8f3c981 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -132,7 +132,7 @@ def masked_arith_op(x: np.ndarray, y, op):
     return result
 
 
-def define_na_arithmetic_op(op, str_rep: str):
+def define_na_arithmetic_op(op, str_rep: Optional[str]):
     def na_op(x, y):
         return na_arithmetic_op(x, y, op, str_rep)
 
diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py
index ccc49adc5da82..755fbd0d9036c 100644
--- a/pandas/tests/arithmetic/common.py
+++ b/pandas/tests/arithmetic/common.py
@@ -70,7 +70,14 @@ def assert_invalid_comparison(left, right, box):
     result = right != left
     tm.assert_equal(result, ~expected)
 
-    msg = "Invalid comparison between|Cannot compare type|not supported between"
+    msg = "|".join(
+        [
+            "Invalid comparison between",
+            "Cannot compare type",
+            "not supported between",
+            "invalid type promotion",
+        ]
+    )
     with pytest.raises(TypeError, match=msg):
         left < right
     with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index f7211ab5f9fd4..5cadf8bff51f1 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -964,7 +964,9 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture):
         obj = tm.box_expected(dti, box_with_array)
         expected = tm.box_expected(expected, box_with_array)
 
-        warn = PerformanceWarning if box_with_array is not pd.DataFrame else None
+        warn = None
+        if box_with_array is not pd.DataFrame or tz_naive_fixture is None:
+            warn = PerformanceWarning
         with tm.assert_produces_warning(warn):
             result = obj - obj.astype(object)
         tm.assert_equal(result, expected)
@@ -1388,8 +1390,7 @@ def test_dt64arr_add_mixed_offset_array(self, box_with_array):
         s = DatetimeIndex([Timestamp("2000-1-1"), Timestamp("2000-2-1")])
         s = tm.box_expected(s, box_with_array)
 
-        warn = None if box_with_array is pd.DataFrame else PerformanceWarning
-        with tm.assert_produces_warning(warn):
+        with tm.assert_produces_warning(PerformanceWarning):
             other = pd.Index([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()])
             other = tm.box_expected(other, box_with_array)
             result = s + other

From 30a836d6ce7a7ef0c9f01daca99cb182bebbbcfa Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 17 Mar 2020 10:46:22 -0700
Subject: [PATCH 02/32] lint fixup

---
 pandas/core/ops/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index f80ad80e15d7b..e127be3b7d644 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -407,7 +407,7 @@ def operate_blockwise(left, right, array_op):
 
             res_blks.extend(nbs)
 
-    slocs = set(y for nb in res_blks for y in nb.mgr_locs.as_array)
+    slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
     nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
     assert nlocs == len(left.columns), (nlocs, len(left.columns))
     assert len(slocs) == nlocs, (len(slocs), nlocs)

From 4334353b53ead13ffb777cb869a17e18687be4c2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 17 Mar 2020 19:14:02 -0700
Subject: [PATCH 03/32] troubleshoot npdev build

---
 pandas/core/ops/array_ops.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index dcef6d8f3c981..4338746f05386 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -5,6 +5,7 @@
 from functools import partial
 import operator
 from typing import Any, Optional
+import warnings
 
 import numpy as np
 
@@ -163,15 +164,18 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal
     """
     import pandas.core.computation.expressions as expressions
 
-    try:
-        result = expressions.evaluate(op, str_rep, left, right)
-    except TypeError:
-        if is_cmp:
-            # numexpr failed on comparison op, e.g. ndarray[float] > datetime
-            #  In this case we do not fall back to the masked op, as that
-            #  will handle complex numbers incorrectly, see GH#32047
-            raise
-        result = masked_arith_op(left, right, op)
+    with warnings.catch_warnings():
+        # suppress warnings from numpy about element-wise comparison
+        warnings.simplefilter("ignore", DeprecationWarning)
+        try:
+            result = expressions.evaluate(op, str_rep, left, right)
+        except TypeError:
+            if is_cmp:
+                # numexpr failed on comparison op, e.g. ndarray[float] > datetime
+                #  In this case we do not fall back to the masked op, as that
+                #  will handle complex numbers incorrectly, see GH#32047
+                raise
+            result = masked_arith_op(left, right, op)
 
     if is_cmp and (is_scalar(result) or result is NotImplemented):
         # numpy returned a scalar instead of operating element-wise

From 713a776e551186e9e2e4d480f362072b60042f65 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 24 Mar 2020 19:25:57 -0700
Subject: [PATCH 04/32] comment

---
 pandas/core/arrays/datetimelike.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 6ca45f9acec91..2fdb33b7b8a8f 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1297,10 +1297,11 @@ def _addsub_object_array(self, other: np.ndarray, op):
         """
         assert op in [operator.add, operator.sub]
         if len(other) == 1 and self.ndim == other.ndim == 1:
+            # If both 1D then broadcasting is unambiguous
             return op(self, other[0])
 
         warnings.warn(
-            "Adding/subtracting array of DateOffsets to "
+            "Adding/subtracting object-dtype array to "
             f"{type(self).__name__} not vectorized",
             PerformanceWarning,
         )
@@ -1308,7 +1309,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
         # Caller is responsible for broadcasting if necessary
         assert self.shape == other.shape, (self.shape, other.shape)
 
-        res_values = op(self.astype("O"), np.array(other))
+        res_values = op(self.astype("O"), np.asarray(other))
         result = array(res_values.ravel())
         result = extract_array(result, extract_numpy=True).reshape(self.shape)
         return result

From 95ef3adddd6554ff1aa21ba3304cc9b175f6c49b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 24 Mar 2020 20:45:20 -0700
Subject: [PATCH 05/32] checkpoint passing

---
 pandas/core/ops/__init__.py | 51 +++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index caf420f06a7fb..4fdd18418f423 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -304,43 +304,62 @@ def operate_blockwise(left, right, array_op):
 
         blk_vals = blk.values
 
+        left_ea = not isinstance(blk_vals, np.ndarray)
+
         if not isinstance(blk_vals, np.ndarray):
             # 1D EA
             assert len(locs) == 1, locs
-            rser = right.iloc[:, locs[0]]
-            rvals = extract_array(rser, extract_numpy=True)
+            rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
+            assert len(rblks) == 1, rblks
+            rblk = rblks[0]
+            assert rblk.shape[0] == 1, rblk.shape
+
+            rvals = rblk.values
+            right_ea = not isinstance(rvals, np.ndarray)
+            if not right_ea:
+                assert rvals.shape[0] == 1, rvals.shape
+                rvals = rvals[0, :]
+            #rser = right.iloc[:, locs[0]]
+            #rvals = extract_array(rser, extract_numpy=True)
             res_values = array_op(blk_vals, rvals)
             nbs = blk._split_op_result(res_values)
+            # Setting nb.mgr_locs is unnecessary here, but harmless
             res_blks.extend(nbs)
             continue
 
         rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
 
         for k, rblk in enumerate(rblks):
-            lvals = blk_vals[rblk.mgr_locs.indexer, :]
             rvals = rblk.values
+            right_ea = not isinstance(rvals, np.ndarray)
 
-            if not isinstance(rvals, np.ndarray):
+            lvals = blk_vals[rblk.mgr_locs.indexer, :]
+
+            if not (left_ea or right_ea):
+                assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+            #elif left_ea and right_ea:
+            #    assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+            elif right_ea:
                 # 1D EA
                 assert lvals.shape[0] == 1, lvals.shape
                 lvals = lvals[0, :]
-                res_values = array_op(lvals, rvals)
-                nbs = rblk._split_op_result(res_values)
-                assert len(nbs) == 1
-                nb = nbs[0]
-                nb.mgr_locs = locs.as_array[nb.mgr_locs]
-                res_blks.append(nb)
-                continue
-
-            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+            else:
+                assert False  # should be unreachable ATM
+                #assert rvals.shape[0] == 1, rvals.shape
+                #rvals = rvals[0, :]
 
             res_values = array_op(lvals, rvals)
-            assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
             nbs = rblk._split_op_result(res_values)
+
+            # Debugging assertions
+            if right_ea:  # or left_ea
+                assert len(nbs) == 1
+            else:
+                assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
+
             for nb in nbs:
                 # TODO: maybe optimize by sticking with slices?
-                nb_mgr_locs = nb.mgr_locs
-                nblocs = locs.as_array[nb_mgr_locs.indexer]
+                nblocs = locs.as_array[nb.mgr_locs.indexer]
                 nb.mgr_locs = nblocs
                 assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
                 assert all(x in locs.as_array for x in nb.mgr_locs.as_array)

From 61e5cd6260d3a94f195fa5466a6c9cc0a6bd7607 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Mar 2020 09:29:34 -0700
Subject: [PATCH 06/32] checkpoint passing

---
 pandas/core/ops/__init__.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 4fdd18418f423..687d1b8bb7891 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -306,7 +306,7 @@ def operate_blockwise(left, right, array_op):
 
         left_ea = not isinstance(blk_vals, np.ndarray)
 
-        if not isinstance(blk_vals, np.ndarray):
+        if False:#left_ea:
             # 1D EA
             assert len(locs) == 1, locs
             rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
@@ -319,8 +319,6 @@ def operate_blockwise(left, right, array_op):
             if not right_ea:
                 assert rvals.shape[0] == 1, rvals.shape
                 rvals = rvals[0, :]
-            #rser = right.iloc[:, locs[0]]
-            #rvals = extract_array(rser, extract_numpy=True)
             res_values = array_op(blk_vals, rvals)
             nbs = blk._split_op_result(res_values)
             # Setting nb.mgr_locs is unnecessary here, but harmless
@@ -329,30 +327,38 @@ def operate_blockwise(left, right, array_op):
 
         rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
 
+        if left_ea:
+            assert len(locs) == 1, locs
+            assert len(rblks) == 1, rblks
+            assert rblks[0].shape[0] == 1, rblks[0].shape
+
         for k, rblk in enumerate(rblks):
             rvals = rblk.values
             right_ea = not isinstance(rvals, np.ndarray)
 
-            lvals = blk_vals[rblk.mgr_locs.indexer, :]
+            #lvals = blk_vals[rblk.mgr_locs.indexer, :]
 
             if not (left_ea or right_ea):
+                lvals = blk_vals[rblk.mgr_locs.indexer, :]
+                assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+            elif left_ea and right_ea:
+                lvals = blk_vals
                 assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-            #elif left_ea and right_ea:
-            #    assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
             elif right_ea:
                 # 1D EA
+                lvals = blk_vals[rblk.mgr_locs.indexer, :]
                 assert lvals.shape[0] == 1, lvals.shape
                 lvals = lvals[0, :]
             else:
-                assert False  # should be unreachable ATM
-                #assert rvals.shape[0] == 1, rvals.shape
-                #rvals = rvals[0, :]
+                lvals = blk_vals
+                assert rvals.shape[0] == 1, rvals.shape
+                rvals = rvals[0, :]
 
             res_values = array_op(lvals, rvals)
             nbs = rblk._split_op_result(res_values)
 
             # Debugging assertions
-            if right_ea:  # or left_ea
+            if right_ea or left_ea:
                 assert len(nbs) == 1
             else:
                 assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)

From 89c3d7bdef4a1b89f14161ac4db3a87611aa3cec Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Mar 2020 10:03:31 -0700
Subject: [PATCH 07/32] refactor

---
 pandas/core/ops/__init__.py | 75 ++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 43 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 687d1b8bb7891..b285c5494ee71 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -4,7 +4,7 @@
 This is not a public API.
 """
 import operator
-from typing import TYPE_CHECKING, List, Optional, Set
+from typing import TYPE_CHECKING, List, Optional, Set, Tuple
 
 import numpy as np
 
@@ -297,34 +297,41 @@ def fill_binop(left, right, fill_value):
 def operate_blockwise(left, right, array_op):
     assert right._indexed_same(left)
 
+    def get_same_shape_values(
+        lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool
+    ) -> Tuple[ArrayLike, ArrayLike]:
+        """
+        Slice lblk.values to align with rblk.  Squeeze if we have EAs.
+        """
+        lvals = lblk.values
+        rvals = rblk.values
+
+        # TODO(EA2D): with 2D EAs pnly this first clause would be needed
+        if not (left_ea or right_ea):
+            lvals = lvals[rblk.mgr_locs.indexer, :]
+            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+        elif left_ea and right_ea:
+            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+        elif right_ea:
+            # lvals are 2D, rvals are 1D
+            lvals = lvals[rblk.mgr_locs.indexer, :]
+            assert lvals.shape[0] == 1, lvals.shape
+            lvals = lvals[0, :]
+        else:
+            # lvals are 1D, rvals are 2D
+            assert rvals.shape[0] == 1, rvals.shape
+            rvals = rvals[0, :]
+
+        return lvals, rvals
+
     res_blks: List["Block"] = []
     rmgr = right._data
     for n, blk in enumerate(left._data.blocks):
         locs = blk.mgr_locs
-
         blk_vals = blk.values
 
         left_ea = not isinstance(blk_vals, np.ndarray)
 
-        if False:#left_ea:
-            # 1D EA
-            assert len(locs) == 1, locs
-            rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
-            assert len(rblks) == 1, rblks
-            rblk = rblks[0]
-            assert rblk.shape[0] == 1, rblk.shape
-
-            rvals = rblk.values
-            right_ea = not isinstance(rvals, np.ndarray)
-            if not right_ea:
-                assert rvals.shape[0] == 1, rvals.shape
-                rvals = rvals[0, :]
-            res_values = array_op(blk_vals, rvals)
-            nbs = blk._split_op_result(res_values)
-            # Setting nb.mgr_locs is unnecessary here, but harmless
-            res_blks.extend(nbs)
-            continue
-
         rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
 
         if left_ea:
@@ -333,38 +340,20 @@ def operate_blockwise(left, right, array_op):
             assert rblks[0].shape[0] == 1, rblks[0].shape
 
         for k, rblk in enumerate(rblks):
-            rvals = rblk.values
-            right_ea = not isinstance(rvals, np.ndarray)
-
-            #lvals = blk_vals[rblk.mgr_locs.indexer, :]
-
-            if not (left_ea or right_ea):
-                lvals = blk_vals[rblk.mgr_locs.indexer, :]
-                assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-            elif left_ea and right_ea:
-                lvals = blk_vals
-                assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-            elif right_ea:
-                # 1D EA
-                lvals = blk_vals[rblk.mgr_locs.indexer, :]
-                assert lvals.shape[0] == 1, lvals.shape
-                lvals = lvals[0, :]
-            else:
-                lvals = blk_vals
-                assert rvals.shape[0] == 1, rvals.shape
-                rvals = rvals[0, :]
+            right_ea = not isinstance(rblk.values, np.ndarray)
+
+            lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea)
 
             res_values = array_op(lvals, rvals)
             nbs = rblk._split_op_result(res_values)
 
-            # Debugging assertions
             if right_ea or left_ea:
                 assert len(nbs) == 1
             else:
                 assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
 
             for nb in nbs:
-                # TODO: maybe optimize by sticking with slices?
+                # Reset mgr_locs to correspond to our original DataFrame
                 nblocs = locs.as_array[nb.mgr_locs.indexer]
                 nb.mgr_locs = nblocs
                 assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)

From e348e464457556b1678c898c9cdc7366cd0b3877 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Mar 2020 10:11:25 -0700
Subject: [PATCH 08/32] blackify

---
 pandas/tests/frame/test_arithmetic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 18cd2a4b0c90b..0b73583002d5c 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -1,7 +1,6 @@
 from collections import deque
 from datetime import datetime
 import operator
-import re
 
 import numpy as np
 import pytest
@@ -47,10 +46,11 @@ def check(df, df2):
                 )
                 tm.assert_frame_equal(result, expected)
 
-                msg = "|".join([
+                msgs = [
                     r"Invalid comparison between dtype=datetime64\[ns\] and ndarray",
                     "invalid type promotion",
-                    ])
+                ]
+                msg = "|".join(msgs)
                 with pytest.raises(TypeError, match=msg):
                     x >= y
                 with pytest.raises(TypeError, match=msg):

From 2b1ba182144759b68e1a7b4eb8f3bfca4f2a05fb Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 31 Mar 2020 14:44:03 -0700
Subject: [PATCH 09/32] disable assertions for perf

---
 pandas/core/ops/__init__.py           | 19 ++++++++++---------
 pandas/tests/frame/test_arithmetic.py |  1 +
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index b285c5494ee71..3911380c7f795 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -356,19 +356,20 @@ def get_same_shape_values(
                 # Reset mgr_locs to correspond to our original DataFrame
                 nblocs = locs.as_array[nb.mgr_locs.indexer]
                 nb.mgr_locs = nblocs
-                assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
-                assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
+                # Assertions are disabled for performance, but should hold:
+                #  assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
+                #  assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
 
             res_blks.extend(nbs)
 
-    slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
-    nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
-    assert nlocs == len(left.columns), (nlocs, len(left.columns))
-    assert len(slocs) == nlocs, (len(slocs), nlocs)
-    assert slocs == set(range(nlocs)), slocs
+    # Assertions are disabled for performance, but should hold:
+    #  slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
+    #  nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
+    #  assert nlocs == len(left.columns), (nlocs, len(left.columns))
+    #  assert len(slocs) == nlocs, (len(slocs), nlocs)
+    #  assert slocs == set(range(nlocs)), slocs
 
-    # TODO: once this is working, pass do_integrity_check=False
-    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes)
+    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes, do_integrity_check=False)
     return new_mgr
 
 
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 01ea3fc8676de..a5d696be24d0a 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -1,6 +1,7 @@
 from collections import deque
 from datetime import datetime
 import operator
+import re
 
 import numpy as np
 import pytest

From 91c86a35072e87ec5fa16fb41a89f4c8c7cb65df Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 31 Mar 2020 21:05:01 -0700
Subject: [PATCH 10/32] asv

---
 asv_bench/benchmarks/arithmetic.py | 48 ++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index 5a8b109c21858..b53ef9505990d 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -80,6 +80,54 @@ def time_frame_op_with_series_axis0(self, opname):
         getattr(self.df, opname)(self.ser, axis=0)
 
 
+class FrameWithFrameWide:
+    # Many-columns, mixed dtypes
+
+    params = [
+        [
+            operator.add,
+            operator.sub,
+            operator.mul,
+            operator.truediv,
+            operator.floordiv,
+            operator.pow,
+            operator.mod,
+            operator.eq,
+            operator.ne,
+            operator.gt,
+            operator.ge,
+            operator.lt,
+            operator.le,
+        ]
+    ]
+    param_names = ["op"]
+
+    def setup(self, op):
+        # we choose dtypes so as to make the blocks
+        #  a) not perfectly match between right and left
+        #  b) appreciably bigger than single columns
+        arr = np.random.randn(10 ** 6).reshape(500, 2000).astype(np.float64)
+        df = pd.DataFrame(arr)
+        df[1000] = df[1000].astype(np.float32)
+        df.iloc[:, 1000:] = df.iloc[:, 1000:].astype(np.float32)
+
+        # TODO: GH#33198 the setting here shoudlnt need two steps
+        df2 = pd.DataFrame(arr)
+        df2[1000] = df2[1000].astype(np.int64)
+        df2.iloc[:, 500:1500] = df2.iloc[:, 500:1500].astype(np.int64)
+
+        self.left = df
+        self.right = df
+
+    def time_op_different_blocks(self, op):
+        # blocks (and dtypes) are not aligned
+        op(self.left, self.right)
+
+    def time_op_same_blocks(self, op):
+        # blocks (and dtypes) are aligned
+        op(self.left, self.left)
+
+
 class Ops:
 
     params = [[True, False], ["default", 1]]

From 2034084db2a999691bcf28f424fc520dadcdeead Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 1 Apr 2020 08:03:28 -0700
Subject: [PATCH 11/32] whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 25f847c698278..d424315f5e416 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -274,7 +274,8 @@ Performance improvements
   :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
   :issue:`32825`,  :issue:`32826`, :issue:`32856`, :issue:`32858`).
 - Performance improvement in :meth:`Series.sum` for nullable (integer and boolean) dtypes (:issue:`30982`).
-
+- Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`)
+-
 
 .. ---------------------------------------------------------------------------
 

From 0c12d35e5cb4737fae982a8df1aa90fc9332c1d6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 3 Apr 2020 10:14:00 -0700
Subject: [PATCH 12/32] revert warning suppression

---
 pandas/core/ops/array_ops.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index f419c548b27c1..c17955457245b 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -6,7 +6,6 @@
 from functools import partial
 import operator
 from typing import Any, Optional, Tuple
-import warnings
 
 import numpy as np
 
@@ -159,17 +158,14 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal
     """
     import pandas.core.computation.expressions as expressions
 
-    with warnings.catch_warnings():
-        # suppress warnings from numpy about element-wise comparison
-        warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            result = expressions.evaluate(op, str_rep, left, right)
-        except TypeError:
-            if is_cmp:
-                # numexpr failed on comparison op, e.g. ndarray[float] > datetime
-                #  In this case we do not fall back to the masked op, as that
-                #  will handle complex numbers incorrectly, see GH#32047
-                raise
+    try:
+        result = expressions.evaluate(op, str_rep, left, right)
+    except TypeError:
+        if is_cmp:
+            # numexpr failed on comparison op, e.g. ndarray[float] > datetime
+            #  In this case we do not fall back to the masked op, as that
+            #  will handle complex numbers incorrectly, see GH#32047
+            raise
             result = masked_arith_op(left, right, op)
 
     if is_cmp and (is_scalar(result) or result is NotImplemented):

From 9727562801ac6a1a98bd4ee348fe3666ec49e801 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 3 Apr 2020 10:51:49 -0700
Subject: [PATCH 13/32] Fixupm indentation

---
 pandas/core/ops/array_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index c17955457245b..05ec48c206b3c 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -166,7 +166,7 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal
             #  In this case we do not fall back to the masked op, as that
             #  will handle complex numbers incorrectly, see GH#32047
             raise
-            result = masked_arith_op(left, right, op)
+        result = masked_arith_op(left, right, op)
 
     if is_cmp and (is_scalar(result) or result is NotImplemented):
         # numpy returned a scalar instead of operating element-wise

From 42bbbf3c7ee0ea3d5352191c380972842bf0edd4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 4 Apr 2020 15:42:26 -0700
Subject: [PATCH 14/32] suppress warning

---
 pandas/core/ops/array_ops.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index 05ec48c206b3c..efd0500d351f4 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -6,6 +6,7 @@
 from functools import partial
 import operator
 from typing import Any, Optional, Tuple
+import warnings
 
 import numpy as np
 
@@ -308,8 +309,13 @@ def comparison_op(
         res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
 
     else:
-        with np.errstate(all="ignore"):
-            res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep, is_cmp=True)
+        with warnings.catch_warnings():
+            # suppress warnings from numpy about element-wise comparison
+            warnings.simplefilter("ignore", DeprecationWarning)
+            with np.errstate(all="ignore"):
+                res_values = na_arithmetic_op(
+                    lvalues, rvalues, op, str_rep, is_cmp=True
+                )
 
     return res_values
 

From 0d958a3b6022cec977fc9b5f6fc802da62d2f284 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 10:34:43 -0700
Subject: [PATCH 15/32] update asv

---
 asv_bench/benchmarks/arithmetic.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index 0473743dabf46..c5ad533ed82b1 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -127,11 +127,13 @@ def setup(self, op):
         df = pd.DataFrame(arr)
         df[1000] = df[1000].astype(np.float32)
         df.iloc[:, 1000:] = df.iloc[:, 1000:].astype(np.float32)
+        df._consolidate_inplace()
 
         # TODO: GH#33198 the setting here shoudlnt need two steps
         df2 = pd.DataFrame(arr)
         df2[1000] = df2[1000].astype(np.int64)
         df2.iloc[:, 500:1500] = df2.iloc[:, 500:1500].astype(np.int64)
+        df2._consolidate_inplace()
 
         self.left = df
         self.right = df

From 56eef516d9fc83d9dfa82c41b61c0d86d7aaf202 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 11:09:41 -0700
Subject: [PATCH 16/32] _data->_mgr

---
 pandas/core/ops/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index bd12fe122bbf0..4d7f4fb026ae8 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -325,8 +325,8 @@ def get_same_shape_values(
         return lvals, rvals
 
     res_blks: List["Block"] = []
-    rmgr = right._data
-    for n, blk in enumerate(left._data.blocks):
+    rmgr = right._mgr
+    for n, blk in enumerate(left._mgr.blocks):
         locs = blk.mgr_locs
         blk_vals = blk.values
 

From ae744b74a082b7305c81b53a02f6ab3762afa2fd Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 9 Apr 2020 14:33:38 -0700
Subject: [PATCH 17/32] update to use faspath constructor

---
 pandas/core/frame.py        |  4 +++-
 pandas/core/ops/__init__.py | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c3018861bce57..e7732265beea1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -442,6 +442,7 @@ def __init__(
             mgr = self._init_mgr(
                 data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy
             )
+
         elif isinstance(data, dict):
             mgr = init_dict(data, index, columns, dtype=dtype)
         elif isinstance(data, ma.MaskedArray):
@@ -5453,10 +5454,11 @@ def _construct_result(self, result) -> "DataFrame":
         -------
         DataFrame
         """
-        out = self._constructor(result, index=self.index, copy=False)
+        out = self._constructor(result, copy=False)
         # Pin columns instead of passing to constructor for compat with
         #  non-unique columns case
         out.columns = self.columns
+        out.index = self.index
         return out
 
     def combine(
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 4d7f4fb026ae8..8bd6e4244e818 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -332,12 +332,14 @@ def get_same_shape_values(
 
         left_ea = not isinstance(blk_vals, np.ndarray)
 
+        # TODO: joris says this is costly, see if we can optimize
         rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
 
-        if left_ea:
-            assert len(locs) == 1, locs
-            assert len(rblks) == 1, rblks
-            assert rblks[0].shape[0] == 1, rblks[0].shape
+        # Assertions are disabled for performance, but should hold:
+        # if left_ea:
+        #    assert len(locs) == 1, locs
+        #    assert len(rblks) == 1, rblks
+        #    assert rblks[0].shape[0] == 1, rblks[0].shape
 
         for k, rblk in enumerate(rblks):
             right_ea = not isinstance(rblk.values, np.ndarray)
@@ -347,10 +349,11 @@ def get_same_shape_values(
             res_values = array_op(lvals, rvals)
             nbs = rblk._split_op_result(res_values)
 
-            if right_ea or left_ea:
-                assert len(nbs) == 1
-            else:
-                assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
+            # Assertions are disabled for performance, but should hold:
+            # if right_ea or left_ea:
+            #    assert len(nbs) == 1
+            # else:
+            #    assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
 
             for nb in nbs:
                 # Reset mgr_locs to correspond to our original DataFrame

From f42c40334812f2765f19565d03e82e9263ce1039 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 10 Apr 2020 11:00:55 -0700
Subject: [PATCH 18/32] update import

---
 pandas/core/ops/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index fede06a8970b9..5f6bc230917a4 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -10,7 +10,7 @@
 
 from pandas._libs import lib
 from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op  # noqa:F401
-from pandas._typing import Level
+from pandas._typing import ArrayLike, Level
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import is_list_like

From 8a2807efdc0e3bd14f44a8dc5efbd54226346647 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 10 Apr 2020 11:01:13 -0700
Subject: [PATCH 19/32] remove unused import

---
 pandas/core/internals/blocks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 75c935cdf2e60..80573f32b936e 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -8,7 +8,7 @@
 
 from pandas._libs import NaT, algos as libalgos, lib, writers
 import pandas._libs.internals as libinternals
-from pandas._libs.tslibs import Timedelta, conversion
+from pandas._libs.tslibs import conversion
 from pandas._libs.tslibs.timezones import tz_compare
 from pandas._typing import ArrayLike
 from pandas.util._validators import validate_bool_kwarg

From fd10fb6cf39619e9a0e66affa5865f6f7755e22f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 10 Apr 2020 14:11:26 -0700
Subject: [PATCH 20/32] rebase compat

---
 pandas/core/ops/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 5f6bc230917a4..6b703dd7f40aa 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -347,6 +347,8 @@ def get_same_shape_values(
             lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea)
 
             res_values = array_op(lvals, rvals)
+            if left_ea and not right_ea and hasattr(res_values, "reshape"):
+                res_values = res_values.reshape(1, -1)
             nbs = rblk._split_op_result(res_values)
 
             # Assertions are disabled for performance, but should hold:

From 7150e87363cc205f0df380c06812de1ca264d8c3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 11 Apr 2020 18:09:00 -0700
Subject: [PATCH 21/32] slice instead of take

---
 pandas/core/internals/managers.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index d1293974b776a..175f0b5332f12 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1322,6 +1322,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                 if allow_fill and fill_value is None:
                     _, fill_value = maybe_promote(blk.dtype)
 
+                # TODO: Any cases where we can optimize this to slice?
                 return [
                     blk.take_nd(
                         slobj,
@@ -1369,11 +1370,18 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                         blocks.append(newblk)
 
                 else:
-                    blocks.append(
-                        blk.take_nd(
-                            blklocs[mgr_locs.indexer], axis=0, new_mgr_locs=mgr_locs,
-                        )
-                    )
+                    taker = blklocs[mgr_locs.indexer]
+                    # TODO: taker.max() probably isnt the Technically Correct
+                    #  way of calling this?
+                    taker = lib.maybe_indices_to_slice(taker, taker.max())
+
+                    if isinstance(taker, slice):
+                        nb = blk.getitem_block(taker)
+                        nb.mgr_locs = mgr_locs
+                    else:
+                        # TODO: just use getitem_block anyway?
+                        nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)
+                    blocks.append(nb)
 
         return blocks
 

From 0ca2125d85f1b68ad4bc03fc462278b5bb387700 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 14 Apr 2020 07:06:21 -0700
Subject: [PATCH 22/32] Dummy commit to force CI


From 2bfc30885b6e3a0bcf1d60ab8b0e731b90c1579e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 15 Apr 2020 07:22:30 -0700
Subject: [PATCH 23/32] update call bound

---
 pandas/core/internals/managers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index bdeb7820d2762..12f7fd3decb43 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1371,9 +1371,9 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
 
                 else:
                     taker = blklocs[mgr_locs.indexer]
-                    # TODO: taker.max() probably isnt the Technically Correct
+                    # TODO: taker.max()+1 probably isnt the Technically Correct
                     #  way of calling this?
-                    taker = lib.maybe_indices_to_slice(taker, taker.max())
+                    taker = lib.maybe_indices_to_slice(taker, taker.max() + 1)
 
                     if isinstance(taker, slice):
                         nb = blk.getitem_block(taker)

From d5ad2a079c1e5ddd097f9bdff32c1ecf8dce890e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 15 Apr 2020 11:49:57 -0700
Subject: [PATCH 24/32] update max_len

---
 pandas/core/internals/managers.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 12f7fd3decb43..089a12ff40bd9 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1322,7 +1322,6 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                 if allow_fill and fill_value is None:
                     _, fill_value = maybe_promote(blk.dtype)
 
-                # TODO: Any cases where we can optimize this to slice?
                 return [
                     blk.take_nd(
                         slobj,
@@ -1371,13 +1370,11 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
 
                 else:
                     taker = blklocs[mgr_locs.indexer]
-                    # TODO: taker.max()+1 probably isnt the Technically Correct
-                    #  way of calling this?
-                    taker = lib.maybe_indices_to_slice(taker, taker.max() + 1)
+                    max_len = max(len(mgr_locs), taker.max() + 1)
+                    taker = lib.maybe_indices_to_slice(taker, max_len)
 
                     if isinstance(taker, slice):
-                        nb = blk.getitem_block(taker)
-                        nb.mgr_locs = mgr_locs
+                        nb = blk.getitem_block(taker, new_mgr_locs=mgr_locs)
                     else:
                         # TODO: just use getitem_block anyway?
                         nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)

From e78570d3c73411bfb9dff3dd4926333ded488d04 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 17 Apr 2020 13:25:25 -0700
Subject: [PATCH 25/32] never take

---
 pandas/core/internals/managers.py | 24 ++++++++++++++++++++----
 pandas/core/ops/__init__.py       |  3 +--
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index b51944d0d9c26..8aa31bbb01720 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1253,12 +1253,21 @@ def reindex_indexer(
 
         return type(self).from_blocks(new_blocks, new_axes)
 
-    def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
+    def _slice_take_blocks_ax0(
+        self, slice_or_indexer, fill_value=lib.no_default, only_slice: bool = False
+    ):
         """
         Slice/take blocks along axis=0.
 
         Overloaded for SingleBlock
 
+        Parameters
+        ----------
+        slice_or_indexer : slice, ndarray[bool], or list-like of ints
+        fill_value : scalar, default lib.no_default
+        only_slice : bool, default False
+            If True, we always return views on existing arrays, never copies.
+
         Returns
         -------
         new_blocks : list of Block
@@ -1331,14 +1340,21 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                 else:
                     taker = blklocs[mgr_locs.indexer]
                     max_len = max(len(mgr_locs), taker.max() + 1)
-                    taker = lib.maybe_indices_to_slice(taker, max_len)
+                    if only_slice:
+                        taker = lib.maybe_indices_to_slice(taker, max_len)
 
                     if isinstance(taker, slice):
                         nb = blk.getitem_block(taker, new_mgr_locs=mgr_locs)
+                        blocks.append(nb)
+                    elif only_slice:
+                        # GH#33597 slice instead of take, so we get
+                        #  views instead of copies
+                        for i, ml in zip(taker, mgr_locs):
+                            nb = blk.getitem_block([i], new_mgr_locs=ml)
+                            blocks.append(nb)
                     else:
-                        # TODO: just use getitem_block anyway?
                         nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)
-                    blocks.append(nb)
+                        blocks.append(nb)
 
         return blocks
 
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 4065ac4ff68f8..8b1b0415fe4e3 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -332,8 +332,7 @@ def get_same_shape_values(
 
         left_ea = not isinstance(blk_vals, np.ndarray)
 
-        # TODO: joris says this is costly, see if we can optimize
-        rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
+        rblks = rmgr._slice_take_blocks_ax0(locs.indexer, only_slice=True)
 
         # Assertions are disabled for performance, but should hold:
         # if left_ea:

From 30f655b662fef6943ef3e46c6b1288b25240a67b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 25 Apr 2020 14:45:43 -0700
Subject: [PATCH 26/32] REF: move operate_blockwise to new file

---
 pandas/core/internals/managers.py |  2 +
 pandas/core/ops/__init__.py       | 88 +-----------------------------
 pandas/core/ops/blockwise.py      | 91 +++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 85 deletions(-)
 create mode 100644 pandas/core/ops/blockwise.py

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 44e877eecd172..c3ce288e3ad9b 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1318,6 +1318,8 @@ def _slice_take_blocks_ax0(
                         blocks.append(newblk)
 
                 else:
+                    # GH#32779 to avoid the performance penalty of copying,
+                    #  we may try to only slice
                     taker = blklocs[mgr_locs.indexer]
                     max_len = max(len(mgr_locs), taker.max() + 1)
                     if only_slice:
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 4c532a0e22132..0b3775f876b39 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -4,13 +4,13 @@
 This is not a public API.
 """
 import operator
-from typing import TYPE_CHECKING, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Optional, Set
 
 import numpy as np
 
 from pandas._libs import lib
 from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op  # noqa:F401
-from pandas._typing import ArrayLike, Level
+from pandas._typing import Level
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import is_list_like
@@ -26,6 +26,7 @@
     logical_op,
 )
 from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY  # noqa:F401
+from pandas.core.ops.blockwise import operate_blockwise
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.ops.dispatch import should_series_dispatch
 from pandas.core.ops.docstrings import (
@@ -294,89 +295,6 @@ def fill_binop(left, right, fill_value):
 # Dispatch logic
 
 
-def operate_blockwise(left, right, array_op):
-    assert right._indexed_same(left)
-
-    def get_same_shape_values(
-        lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool
-    ) -> Tuple[ArrayLike, ArrayLike]:
-        """
-        Slice lblk.values to align with rblk.  Squeeze if we have EAs.
-        """
-        lvals = lblk.values
-        rvals = rblk.values
-
-        # TODO(EA2D): with 2D EAs pnly this first clause would be needed
-        if not (left_ea or right_ea):
-            lvals = lvals[rblk.mgr_locs.indexer, :]
-            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-        elif left_ea and right_ea:
-            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-        elif right_ea:
-            # lvals are 2D, rvals are 1D
-            lvals = lvals[rblk.mgr_locs.indexer, :]
-            assert lvals.shape[0] == 1, lvals.shape
-            lvals = lvals[0, :]
-        else:
-            # lvals are 1D, rvals are 2D
-            assert rvals.shape[0] == 1, rvals.shape
-            rvals = rvals[0, :]
-
-        return lvals, rvals
-
-    res_blks: List["Block"] = []
-    rmgr = right._mgr
-    for n, blk in enumerate(left._mgr.blocks):
-        locs = blk.mgr_locs
-        blk_vals = blk.values
-
-        left_ea = not isinstance(blk_vals, np.ndarray)
-
-        rblks = rmgr._slice_take_blocks_ax0(locs.indexer, only_slice=True)
-
-        # Assertions are disabled for performance, but should hold:
-        # if left_ea:
-        #    assert len(locs) == 1, locs
-        #    assert len(rblks) == 1, rblks
-        #    assert rblks[0].shape[0] == 1, rblks[0].shape
-
-        for k, rblk in enumerate(rblks):
-            right_ea = not isinstance(rblk.values, np.ndarray)
-
-            lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea)
-
-            res_values = array_op(lvals, rvals)
-            if left_ea and not right_ea and hasattr(res_values, "reshape"):
-                res_values = res_values.reshape(1, -1)
-            nbs = rblk._split_op_result(res_values)
-
-            # Assertions are disabled for performance, but should hold:
-            # if right_ea or left_ea:
-            #    assert len(nbs) == 1
-            # else:
-            #    assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
-
-            for nb in nbs:
-                # Reset mgr_locs to correspond to our original DataFrame
-                nblocs = locs.as_array[nb.mgr_locs.indexer]
-                nb.mgr_locs = nblocs
-                # Assertions are disabled for performance, but should hold:
-                #  assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
-                #  assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
-
-            res_blks.extend(nbs)
-
-    # Assertions are disabled for performance, but should hold:
-    #  slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
-    #  nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
-    #  assert nlocs == len(left.columns), (nlocs, len(left.columns))
-    #  assert len(slocs) == nlocs, (len(slocs), nlocs)
-    #  assert slocs == set(range(nlocs)), slocs
-
-    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes, do_integrity_check=False)
-    return new_mgr
-
-
 def dispatch_to_series(left, right, func, str_rep=None, axis=None):
     """
     Evaluate the frame operation func(left, right) by evaluating
diff --git a/pandas/core/ops/blockwise.py b/pandas/core/ops/blockwise.py
new file mode 100644
index 0000000000000..509d8b9cc2d7b
--- /dev/null
+++ b/pandas/core/ops/blockwise.py
@@ -0,0 +1,91 @@
+from typing import TYPE_CHECKING, List, Tuple
+
+import numpy as np
+
+from pandas._typing import ArrayLike
+
+if TYPE_CHECKING:
+    from pandas.core.internals.blocks import Block  # noqa:F401
+
+
+def operate_blockwise(left, right, array_op):
+    assert right._indexed_same(left)
+
+    def get_same_shape_values(
+        lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool
+    ) -> Tuple[ArrayLike, ArrayLike]:
+        """
+        Slice lblk.values to align with rblk.  Squeeze if we have EAs.
+        """
+        lvals = lblk.values
+        rvals = rblk.values
+
+        # TODO(EA2D): with 2D EAs pnly this first clause would be needed
+        if not (left_ea or right_ea):
+            lvals = lvals[rblk.mgr_locs.indexer, :]
+            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+        elif left_ea and right_ea:
+            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+        elif right_ea:
+            # lvals are 2D, rvals are 1D
+            lvals = lvals[rblk.mgr_locs.indexer, :]
+            assert lvals.shape[0] == 1, lvals.shape
+            lvals = lvals[0, :]
+        else:
+            # lvals are 1D, rvals are 2D
+            assert rvals.shape[0] == 1, rvals.shape
+            rvals = rvals[0, :]
+
+        return lvals, rvals
+
+    res_blks: List["Block"] = []
+    rmgr = right._mgr
+    for n, blk in enumerate(left._mgr.blocks):
+        locs = blk.mgr_locs
+        blk_vals = blk.values
+
+        left_ea = not isinstance(blk_vals, np.ndarray)
+
+        rblks = rmgr._slice_take_blocks_ax0(locs.indexer, only_slice=True)
+
+        # Assertions are disabled for performance, but should hold:
+        # if left_ea:
+        #    assert len(locs) == 1, locs
+        #    assert len(rblks) == 1, rblks
+        #    assert rblks[0].shape[0] == 1, rblks[0].shape
+
+        for k, rblk in enumerate(rblks):
+            right_ea = not isinstance(rblk.values, np.ndarray)
+
+            lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea)
+
+            res_values = array_op(lvals, rvals)
+            if left_ea and not right_ea and hasattr(res_values, "reshape"):
+                res_values = res_values.reshape(1, -1)
+            nbs = rblk._split_op_result(res_values)
+
+            # Assertions are disabled for performance, but should hold:
+            # if right_ea or left_ea:
+            #    assert len(nbs) == 1
+            # else:
+            #    assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
+
+            for nb in nbs:
+                # Reset mgr_locs to correspond to our original DataFrame
+                nblocs = locs.as_array[nb.mgr_locs.indexer]
+                nb.mgr_locs = nblocs
+                # Assertions are disabled for performance, but should hold:
+                #  assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
+                #  assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
+
+            res_blks.extend(nbs)
+
+    # Assertions are disabled for performance, but should hold:
+    #  slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
+    #  nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
+    #  assert nlocs == len(left.columns), (nlocs, len(left.columns))
+    #  assert len(slocs) == nlocs, (len(slocs), nlocs)
+    #  assert slocs == set(range(nlocs)), slocs
+
+    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes, do_integrity_check=False)
+    return new_mgr

From 30d658082d98e32320fe750c0c756ff8f386beb3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 25 Apr 2020 16:33:52 -0700
Subject: [PATCH 27/32] ndim compat

---
 pandas/core/arrays/datetimelike.py | 3 ++-
 pandas/core/ops/__init__.py        | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index a864ab03e0ac5..77521e9a71c15 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1359,8 +1359,9 @@ def _addsub_object_array(self, other: np.ndarray, op):
         result : same class as self
         """
         assert op in [operator.add, operator.sub]
-        if len(other) == 1 and self.ndim == other.ndim == 1:
+        if len(other) == 1:
             # If both 1D then broadcasting is unambiguous
+            # TODO(EA2D): require self.ndim == other.ndim here
             return op(self, other[0])
 
         warnings.warn(
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 0b3775f876b39..6fe33534b0bb3 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -58,7 +58,6 @@
 
 if TYPE_CHECKING:
     from pandas import DataFrame  # noqa:F401
-    from pandas.core.internals.blocks import Block  # noqa: F401
 
 # -----------------------------------------------------------------------------
 # constants

From f86deb4d8ef7962b416472e258c6e9e68ae438a4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 26 Apr 2020 12:40:35 -0700
Subject: [PATCH 28/32] separate out helper function

---
 pandas/core/ops/blockwise.py | 57 ++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/pandas/core/ops/blockwise.py b/pandas/core/ops/blockwise.py
index 509d8b9cc2d7b..2340a8a485b3d 100644
--- a/pandas/core/ops/blockwise.py
+++ b/pandas/core/ops/blockwise.py
@@ -11,33 +11,6 @@
 def operate_blockwise(left, right, array_op):
     assert right._indexed_same(left)
 
-    def get_same_shape_values(
-        lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool
-    ) -> Tuple[ArrayLike, ArrayLike]:
-        """
-        Slice lblk.values to align with rblk.  Squeeze if we have EAs.
-        """
-        lvals = lblk.values
-        rvals = rblk.values
-
-        # TODO(EA2D): with 2D EAs pnly this first clause would be needed
-        if not (left_ea or right_ea):
-            lvals = lvals[rblk.mgr_locs.indexer, :]
-            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-        elif left_ea and right_ea:
-            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-        elif right_ea:
-            # lvals are 2D, rvals are 1D
-            lvals = lvals[rblk.mgr_locs.indexer, :]
-            assert lvals.shape[0] == 1, lvals.shape
-            lvals = lvals[0, :]
-        else:
-            # lvals are 1D, rvals are 2D
-            assert rvals.shape[0] == 1, rvals.shape
-            rvals = rvals[0, :]
-
-        return lvals, rvals
-
     res_blks: List["Block"] = []
     rmgr = right._mgr
     for n, blk in enumerate(left._mgr.blocks):
@@ -57,7 +30,7 @@ def get_same_shape_values(
         for k, rblk in enumerate(rblks):
             right_ea = not isinstance(rblk.values, np.ndarray)
 
-            lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea)
+            lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea)
 
             res_values = array_op(lvals, rvals)
             if left_ea and not right_ea and hasattr(res_values, "reshape"):
@@ -89,3 +62,31 @@ def get_same_shape_values(
 
     new_mgr = type(rmgr)(res_blks, axes=rmgr.axes, do_integrity_check=False)
     return new_mgr
+
+
+def _get_same_shape_values(
+    lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool
+) -> Tuple[ArrayLike, ArrayLike]:
+    """
+    Slice lblk.values to align with rblk.  Squeeze if we have EAs.
+    """
+    lvals = lblk.values
+    rvals = rblk.values
+
+    # TODO(EA2D): with 2D EAs pnly this first clause would be needed
+    if not (left_ea or right_ea):
+        lvals = lvals[rblk.mgr_locs.indexer, :]
+        assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+    elif left_ea and right_ea:
+        assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+    elif right_ea:
+        # lvals are 2D, rvals are 1D
+        lvals = lvals[rblk.mgr_locs.indexer, :]
+        assert lvals.shape[0] == 1, lvals.shape
+        lvals = lvals[0, :]
+    else:
+        # lvals are 1D, rvals are 2D
+        assert rvals.shape[0] == 1, rvals.shape
+        rvals = rvals[0, :]
+
+    return lvals, rvals

From 0c46531063ea61dbd4447ca68b7947e426b71ccf Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 11 May 2020 13:38:38 -0700
Subject: [PATCH 29/32] update per comments

---
 pandas/_libs/internals.pyx                  |  2 +-
 pandas/core/arrays/datetimelike.py          |  4 +++
 pandas/core/internals/managers.py           | 27 +++++++++++++++------
 pandas/core/ops/blockwise.py                |  6 ++++-
 pandas/tests/arithmetic/test_timedelta64.py |  3 +--
 5 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 1e53b789aa05c..dc8b0dd39bb82 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -48,7 +48,7 @@ cdef class BlockPlacement:
         else:
             # Cython memoryview interface requires ndarray to be writeable.
             arr = np.require(val, dtype=np.int64, requirements='W')
-            assert arr.ndim == 1
+            assert arr.ndim == 1, arr.shape
             self._as_array = arr
             self._has_array = True
 
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index f1e01186e99f0..b4c5868065eaa 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -95,6 +95,10 @@ def _validate_comparison_value(self, other):
 
     @unpack_zerodim_and_defer(opname)
     def wrapper(self, other):
+        if self.ndim > 1 and getattr(other, "shape", None) == self.shape:
+            # TODO: handle 2D-like listlikes
+            return op(self.ravel(), other.ravel()).reshape(self.shape)
+
         try:
             other = _validate_comparison_value(self, other)
         except InvalidComparison:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 5e6d2ccd38301..238c68d6b6cd8 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1247,6 +1247,7 @@ def _slice_take_blocks_ax0(
         fill_value : scalar, default lib.no_default
         only_slice : bool, default False
             If True, we always return views on existing arrays, never copies.
+            This is used when called from ops.blockwise.operate_blockwise.
 
         Returns
         -------
@@ -1271,14 +1272,24 @@ def _slice_take_blocks_ax0(
                 if allow_fill and fill_value is None:
                     _, fill_value = maybe_promote(blk.dtype)
 
-                return [
-                    blk.take_nd(
-                        slobj,
-                        axis=0,
-                        new_mgr_locs=slice(0, sllen),
-                        fill_value=fill_value,
-                    )
-                ]
+                if not allow_fill and only_slice:
+                    # GH#33597 slice instead of take, so we get
+                    #  views instead of copies
+                    blocks = []
+                    for i, ml in enumerate(slobj):
+                        nb = blk.getitem_block([ml], new_mgr_locs=i)
+                        print(nb.shape, np.values.shape)
+                        blocks.append(nb)
+                    return blocks
+                else:
+                    return [
+                        blk.take_nd(
+                            slobj,
+                            axis=0,
+                            new_mgr_locs=slice(0, sllen),
+                            fill_value=fill_value,
+                        )
+                    ]
 
         if sl_type in ("slice", "mask"):
             blknos = self.blknos[slobj]
diff --git a/pandas/core/ops/blockwise.py b/pandas/core/ops/blockwise.py
index 2340a8a485b3d..153cd2a0a0c76 100644
--- a/pandas/core/ops/blockwise.py
+++ b/pandas/core/ops/blockwise.py
@@ -9,7 +9,8 @@
 
 
 def operate_blockwise(left, right, array_op):
-    assert right._indexed_same(left)
+    # At this point we have already checked
+    #  assert right._indexed_same(left)
 
     res_blks: List["Block"] = []
     rmgr = right._mgr
@@ -73,6 +74,9 @@ def _get_same_shape_values(
     lvals = lblk.values
     rvals = rblk.values
 
+    # Require that the indexing into lvals be slice-like
+    assert rblk.mgr_locs.is_slice_like, rblk.mgr_locs
+
     # TODO(EA2D): with 2D EAs pnly this first clause would be needed
     if not (left_ea or right_ea):
         lvals = lvals[rblk.mgr_locs.indexer, :]
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index d540ff923c929..080fdf53b46e6 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -552,8 +552,7 @@ def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture):
         obj = tm.box_expected(tdi, box)
         other = tm.box_expected(dti, box)
 
-        warn = PerformanceWarning if box is not pd.DataFrame else None
-        with tm.assert_produces_warning(warn):
+        with tm.assert_produces_warning(PerformanceWarning):
             result = obj + other.astype(object)
         tm.assert_equal(result, other)
 

From 32e70d880e22ee7a850669a9ff3742087c81c468 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 12 May 2020 09:49:25 -0700
Subject: [PATCH 30/32] update per comments

---
 asv_bench/benchmarks/arithmetic.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index d670566ff160d..e4743601ef207 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -106,19 +106,10 @@ class FrameWithFrameWide:
 
     params = [
         [
+            # GH#32779 has discussion of which operators are included here
             operator.add,
-            operator.sub,
-            operator.mul,
-            operator.truediv,
             operator.floordiv,
-            operator.pow,
-            operator.mod,
-            operator.eq,
-            operator.ne,
             operator.gt,
-            operator.ge,
-            operator.lt,
-            operator.le,
         ]
     ]
     param_names = ["op"]
@@ -127,13 +118,20 @@ def setup(self, op):
         # we choose dtypes so as to make the blocks
         #  a) not perfectly match between right and left
         #  b) appreciably bigger than single columns
-        arr = np.random.randn(10 ** 6).reshape(500, 2000).astype(np.float64)
-        df = pd.DataFrame(arr)
-        df[1000] = df[1000].astype(np.float32)
-        df.iloc[:, 1000:] = df.iloc[:, 1000:].astype(np.float32)
+        n_cols = 2000
+        n_rows = 500
+
+        # construct dataframe with 2 blocks
+        arr1 = np.random.randn(n_rows, int(n_cols / 2)).astype("f8")
+        arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("f4")
+        df = pd.concat(
+            [pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True,
+        )
+        # should already be the case, but just to be sure
         df._consolidate_inplace()
 
         # TODO: GH#33198 the setting here shoudlnt need two steps
+        arr = np.random.randn(10 ** 6).reshape(n_rows, n_cols).astype(np.float64)
         df2 = pd.DataFrame(arr)
         df2[1000] = df2[1000].astype(np.int64)
         df2.iloc[:, 500:1500] = df2.iloc[:, 500:1500].astype(np.int64)

From 41e8e789ece3476bc7998665a6a0a7d10e7a1f4f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 16 May 2020 09:54:53 -0700
Subject: [PATCH 31/32] update asv

---
 asv_bench/benchmarks/arithmetic.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index e4743601ef207..08a11ba2607a5 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -131,14 +131,19 @@ def setup(self, op):
         df._consolidate_inplace()
 
         # TODO: GH#33198 the setting here shoudlnt need two steps
-        arr = np.random.randn(10 ** 6).reshape(n_rows, n_cols).astype(np.float64)
-        df2 = pd.DataFrame(arr)
-        df2[1000] = df2[1000].astype(np.int64)
-        df2.iloc[:, 500:1500] = df2.iloc[:, 500:1500].astype(np.int64)
+        arr1 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
+        arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("i8")
+        arr3 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
+        df2 = pd.concat(
+            [pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
+            axis=1,
+            ignore_index=True,
+        )
+        # should already be the case, but just to be sure
         df2._consolidate_inplace()
 
         self.left = df
-        self.right = df
+        self.right = df2
 
     def time_op_different_blocks(self, op):
         # blocks (and dtypes) are not aligned

From 8c4f951684183333070a9f9ae32b19c7c9c9853a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 17 May 2020 14:34:47 -0700
Subject: [PATCH 32/32] requested edits

---
 pandas/core/internals/managers.py |  9 ++++-----
 pandas/core/ops/blockwise.py      | 20 +++++++++++++-------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 8cf0dbd8bd7d8..590b92481feca 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1311,11 +1311,10 @@ def _slice_take_blocks_ax0(
                 if not allow_fill and only_slice:
                     # GH#33597 slice instead of take, so we get
                     #  views instead of copies
-                    blocks = []
-                    for i, ml in enumerate(slobj):
-                        nb = blk.getitem_block([ml], new_mgr_locs=i)
-                        print(nb.shape, np.values.shape)
-                        blocks.append(nb)
+                    blocks = [
+                        blk.getitem_block([ml], new_mgr_locs=i)
+                        for i, ml in enumerate(slobj)
+                    ]
                     return blocks
                 else:
                     return [
diff --git a/pandas/core/ops/blockwise.py b/pandas/core/ops/blockwise.py
index 153cd2a0a0c76..f41a30b136637 100644
--- a/pandas/core/ops/blockwise.py
+++ b/pandas/core/ops/blockwise.py
@@ -44,13 +44,7 @@ def operate_blockwise(left, right, array_op):
             # else:
             #    assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
 
-            for nb in nbs:
-                # Reset mgr_locs to correspond to our original DataFrame
-                nblocs = locs.as_array[nb.mgr_locs.indexer]
-                nb.mgr_locs = nblocs
-                # Assertions are disabled for performance, but should hold:
-                #  assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
-                #  assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
+            _reset_block_mgr_locs(nbs, locs)
 
             res_blks.extend(nbs)
 
@@ -65,6 +59,18 @@ def operate_blockwise(left, right, array_op):
     return new_mgr
 
 
+def _reset_block_mgr_locs(nbs: List["Block"], locs):
+    """
+    Reset mgr_locs to correspond to our original DataFrame.
+    """
+    for nb in nbs:
+        nblocs = locs.as_array[nb.mgr_locs.indexer]
+        nb.mgr_locs = nblocs
+        # Assertions are disabled for performance, but should hold:
+        #  assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
+        #  assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
+
+
 def _get_same_shape_values(
     lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool
 ) -> Tuple[ArrayLike, ArrayLike]: